library(readr)
library(dplyr)
package ‘dplyr’ was built under R version 3.4.4
Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
library(tidyr)
package ‘tidyr’ was built under R version 3.4.4
library(ggplot2)
package ‘ggplot2’ was built under R version 3.4.4RStudio Community is a great place to get help: https://community.rstudio.com/c/tidyverse.
library(glmnet)
package ‘glmnet’ was built under R version 3.4.4Loading required package: Matrix
package ‘Matrix’ was built under R version 3.4.4
Attaching package: ‘Matrix’

The following object is masked from ‘package:tidyr’:

    expand

Loading required package: foreach
package ‘foreach’ was built under R version 3.4.3Loaded glmnet 2.0-16
library(gbm)
package ‘gbm’ was built under R version 3.4.4Loaded gbm 2.1.4
library(gam)
package ‘gam’ was built under R version 3.4.4Loading required package: splines
Loaded gam 1.16
library(stringr)
package ‘stringr’ was built under R version 3.4.4
library(xgboost)
package ‘xgboost’ was built under R version 3.4.4
Attaching package: ‘xgboost’

The following object is masked from ‘package:dplyr’:

    slice
library(caret)
package ‘caret’ was built under R version 3.4.4Loading required package: lattice
unknown timezone 'zone/tz/2018g.1.0/zoneinfo/America/New_York'
library(Matrix)
library(e1071)
package ‘e1071’ was built under R version 3.4.4
library(liquidSVM)

Attaching package: ‘liquidSVM’

The following object is masked from ‘package:e1071’:

    svm
# read data
read_csv("Data/wine-reviews/winemag-data-130k-v2.csv") %>% select(-X1) %>% unique -> data
Missing column names filled in: 'X1' [1]Parsed with column specification:
cols(
  X1 = col_integer(),
  country = col_character(),
  description = col_character(),
  designation = col_character(),
  points = col_integer(),
  price = col_double(),
  province = col_character(),
  region_1 = col_character(),
  region_2 = col_character(),
  taster_name = col_character(),
  taster_twitter_handle = col_character(),
  title = col_character(),
  variety = col_character(),
  winery = col_character()
)

|==                                                                                        |   3%    1 MB
|===                                                                                       |   3%    1 MB
|===                                                                                       |   4%    2 MB
|====                                                                                      |   4%    2 MB
|====                                                                                      |   5%    2 MB
|=====                                                                                     |   5%    3 MB
|=====                                                                                     |   6%    3 MB
|======                                                                                    |   7%    3 MB
|======                                                                                    |   7%    3 MB
|=======                                                                                   |   8%    4 MB
|=======                                                                                   |   8%    4 MB
|========                                                                                  |   9%    4 MB
|========                                                                                  |   9%    4 MB
|=========                                                                                 |  10%    5 MB
|=========                                                                                 |  10%    5 MB
|==========                                                                                |  11%    5 MB
|==========                                                                                |  12%    6 MB
|===========                                                                               |  12%    6 MB
|===========                                                                               |  13%    6 MB
|============                                                                              |  13%    6 MB
|============                                                                              |  14%    7 MB
|=============                                                                             |  14%    7 MB
|=============                                                                             |  15%    7 MB
|==============                                                                            |  15%    8 MB
|==============                                                                            |  16%    8 MB
|===============                                                                           |  16%    8 MB
|===============                                                                           |  17%    8 MB
|================                                                                          |  18%    9 MB
|================                                                                          |  18%    9 MB
|=================                                                                         |  19%    9 MB
|=================                                                                         |  19%    9 MB
|==================                                                                        |  20%   10 MB
|==================                                                                        |  20%   10 MB
|===================                                                                       |  21%   10 MB
|===================                                                                       |  21%   11 MB
|====================                                                                      |  22%   11 MB
|====================                                                                      |  23%   11 MB
|=====================                                                                     |  23%   11 MB
|=====================                                                                     |  24%   12 MB
|======================                                                                    |  24%   12 MB
|======================                                                                    |  25%   12 MB
|=======================                                                                   |  25%   12 MB
|=======================                                                                   |  26%   13 MB
|========================                                                                  |  26%   13 MB
|========================                                                                  |  27%   13 MB
|=========================                                                                 |  27%   14 MB
|=========================                                                                 |  28%   14 MB
|==========================                                                                |  29%   14 MB
|==========================                                                                |  29%   14 MB
|===========================                                                               |  30%   15 MB
|===========================                                                               |  30%   15 MB
|============================                                                              |  31%   15 MB
|============================                                                              |  31%   16 MB
|=============================                                                             |  32%   16 MB
|=============================                                                             |  32%   16 MB
|==============================                                                            |  33%   16 MB
|==============================                                                            |  33%   17 MB
|===============================                                                           |  34%   17 MB
|===============================                                                           |  35%   17 MB
|================================                                                          |  35%   18 MB
|================================                                                          |  36%   18 MB
|=================================                                                         |  36%   18 MB
|=================================                                                         |  37%   18 MB
|==================================                                                        |  37%   19 MB
|==================================                                                        |  38%   19 MB
|===================================                                                       |  38%   19 MB
|===================================                                                       |  39%   19 MB
|====================================                                                      |  40%   20 MB
|====================================                                                      |  40%   20 MB
|=====================================                                                     |  41%   20 MB
|=====================================                                                     |  41%   21 MB
|======================================                                                    |  42%   21 MB
|======================================                                                    |  42%   21 MB
|=======================================                                                   |  43%   21 MB
|=======================================                                                   |  43%   22 MB
|========================================                                                  |  44%   22 MB
|========================================                                                  |  45%   22 MB
|=========================================                                                 |  45%   23 MB
|==========================================                                                |  46%   23 MB
|==========================================                                                |  46%   23 MB
|==========================================                                                |  47%   23 MB
|===========================================                                               |  47%   24 MB
|===========================================                                               |  48%   24 MB
|============================================                                              |  48%   24 MB
|============================================                                              |  49%   24 MB
|=============================================                                             |  49%   25 MB
|=============================================                                             |  50%   25 MB
|==============================================                                            |  51%   25 MB
|==============================================                                            |  51%   26 MB
|===============================================                                           |  52%   26 MB
|===============================================                                           |  52%   26 MB
|================================================                                          |  53%   26 MB
|================================================                                          |  53%   27 MB
|=================================================                                         |  54%   27 MB
|=================================================                                         |  54%   27 MB
|==================================================                                        |  55%   27 MB
|==================================================                                        |  55%   28 MB
|===================================================                                       |  56%   28 MB
|===================================================                                       |  57%   28 MB
|====================================================                                      |  57%   29 MB
|====================================================                                      |  58%   29 MB
|=====================================================                                     |  58%   29 MB
|=====================================================                                     |  59%   29 MB
|======================================================                                    |  59%   30 MB
|======================================================                                    |  60%   30 MB
|=======================================================                                   |  60%   30 MB
|=======================================================                                   |  61%   31 MB
|========================================================                                  |  62%   31 MB
|========================================================                                  |  62%   31 MB
|=========================================================                                 |  63%   31 MB
|=========================================================                                 |  63%   32 MB
|==========================================================                                |  64%   32 MB
|==========================================================                                |  64%   32 MB
|===========================================================                               |  65%   32 MB
|============================================================                              |  65%   33 MB
|============================================================                              |  66%   33 MB
|=============================================================                             |  67%   33 MB
|=============================================================                             |  67%   34 MB
|==============================================================                            |  68%   34 MB
|==============================================================                            |  68%   34 MB
|===============================================================                           |  69%   34 MB
|===============================================================                           |  69%   35 MB
|================================================================                          |  70%   35 MB
|================================================================                          |  70%   35 MB
|=================================================================                         |  71%   36 MB
|=================================================================                         |  72%   36 MB
|==================================================================                        |  72%   36 MB
|==================================================================                        |  73%   36 MB
|===================================================================                       |  73%   37 MB
|===================================================================                       |  74%   37 MB
|===================================================================                       |  74%   37 MB
|====================================================================                      |  75%   37 MB
|====================================================================                      |  75%   38 MB
|=====================================================================                     |  76%   38 MB
|=====================================================================                     |  76%   38 MB
|======================================================================                    |  77%   39 MB
|======================================================================                    |  78%   39 MB
|=======================================================================                   |  78%   39 MB
|=======================================================================                   |  79%   39 MB
|========================================================================                  |  79%   40 MB
|========================================================================                  |  80%   40 MB
|=========================================================================                 |  80%   40 MB
|==========================================================================                |  81%   41 MB
|==========================================================================                |  81%   41 MB
|==========================================================================                |  82%   41 MB
|===========================================================================               |  82%   41 MB
|===========================================================================               |  83%   42 MB
|============================================================================              |  84%   42 MB
|============================================================================              |  84%   42 MB
|=============================================================================             |  85%   42 MB
|=============================================================================             |  85%   43 MB
|==============================================================================            |  86%   43 MB
|==============================================================================            |  86%   43 MB
|===============================================================================           |  87%   44 MB
|===============================================================================           |  87%   44 MB
|================================================================================          |  88%   44 MB
|================================================================================          |  88%   44 MB
|=================================================================================         |  89%   45 MB
|=================================================================================         |  90%   45 MB
|==================================================================================        |  90%   45 MB
|==================================================================================        |  91%   46 MB
|===================================================================================       |  91%   46 MB
|===================================================================================       |  92%   46 MB
|====================================================================================      |  92%   46 MB
|====================================================================================      |  93%   47 MB
|=====================================================================================     |  93%   47 MB
|=====================================================================================     |  94%   47 MB
|======================================================================================    |  95%   47 MB
|======================================================================================    |  95%   48 MB
|=======================================================================================   |  96%   48 MB
|=======================================================================================   |  96%   48 MB
|========================================================================================  |  97%   49 MB
|========================================================================================  |  97%   49 MB
|========================================================================================= |  98%   49 MB
|==========================================================================================|  98%   49 MB
|==========================================================================================|  99%   50 MB
|===========================================================================================| 100%   50 MB
# preprocessing
scale_taster <- function(points){
    # takes a vector of numbers, subtracts every element by the mean of the vector, and then
    # divides every element by the standard deviation of the vector
    
    return((points - mean(points, na.rm = TRUE)) / sd(points, na.rm = TRUE))
}
percentile_taster <- function(x){
    # takes a vector of numbers, ranks every element and divides by n, giving the percentile of each element
    trunc(rank(x))/length(x) * 100
}
data <- data %>% group_by(taster_name) %>% mutate("Scaled_Points" = scale_taster(points))
data <- data %>% group_by(taster_name) %>% mutate("Percentile_Points" = percentile_taster(points))
tab <- data %>% group_by(province) %>% summarize("Proportion" = n()/nrow(data))
tab <- tab[tab$Proportion > 0.01, ]
tabcountry <-  data %>% group_by(country) %>% summarize("Proportion" = n()/nrow(data))
tabcountry <- tabcountry[tabcountry$Proportion > 0.01, ]
data$country_other <- ifelse(data$country %in% tabcountry$country, 
                                paste0(data$country, "_other"), data$country)
data$location <- ifelse(data$province %in% tab$province, data$province,
                                     data$country_other)
year <- str_extract_all(data$title, "[1-2][09][0-9]{2}")
data$year <- lapply(year, function(x){
    x = x %>% as.numeric
  if(!all(is.na(x))){
    newx <- x[(x > 1900) & (x < 2018)]
    if (!all(is.na(newx))) {
      newx <- max(newx)
      return(newx)
    } else {
      return(NA)
    }
  } else {
    return(NA)
  }}) %>% unlist
data$location <- factor(data$location)
data$taster_name <- factor(data$taster_name)
data$taster_name <- addNA(data$taster_name)
data$title <- factor(data$title)
data$variety <- factor(data$variety)
data$region_1 <- factor(data$region_1)
data$region_2 <- factor(data$region_2)
data$country <- factor(data$country)
data$province <- factor(data$province)
data$winery <- factor(data$winery)
data$taster_twitter_handle <- factor(data$taster_twitter_handle)
data$designation <- factor(data$designation)
# helper function
impute_mean <- function(x) replace(x, is.na(x), mean(x, na.rm = TRUE))
    # impute_mean replaces missing values with the average value of a group
clean <- function(df){
    # clean removes the varieties that only have missing prices, and are thus unimputable by our rule,
    # and then it imputes the remaining missing prices using the average price of that wine's variety
    
    df %>% group_by(variety) %>% summarize("Average_Price" = mean(price, na.rm = T), 
                                           "Count" = n()) %>% 
    filter(is.na(Average_Price)) %>% select(variety) %>% unlist() -> drop_variety 
    
    df %>% filter(!(variety %in% drop_variety)) -> sample2
    
    sample2 %>% group_by(variety) %>% mutate(price = impute_mean(price)) -> sample2
    
    sample2 <- sample2[complete.cases(sample2),]
    return(sample2)
}
# check data
dim(data)
[1] 119988     18
summary(data)
     country      description              designation        points           price        
 US      :50457   Length:119988      Reserve     : 1871   Min.   : 80.00   Min.   :   4.00  
 France  :20353   Class :character   Estate      : 1223   1st Qu.: 86.00   1st Qu.:  17.00  
 Italy   :17940   Mode  :character   Reserva     : 1176   Median : 88.00   Median :  25.00  
 Spain   : 6116                      Riserva     :  647   Mean   : 88.44   Mean   :  35.62  
 Portugal: 5256                      Estate Grown:  567   3rd Qu.: 91.00   3rd Qu.:  42.00  
 (Other) :19807                      (Other)     :79959   Max.   :100.00   Max.   :3300.00  
 NA's    :   59                      NA's        :34545                    NA's   :8395     
       province                     region_1                  region_2                taster_name   
 California:33656   Napa Valley         : 4174   Central Coast    :10233   NA               :24917  
 Washington: 7965   Columbia Valley (WA): 3795   Sonoma           : 8390   Roger Voss       :23560  
 Bordeaux  : 5556   Russian River Valley: 2862   Columbia Valley  : 7466   Michael Schachner:14046  
 Tuscany   : 5391   California          : 2468   Napa             : 6369   Kerin O’Keefe    : 9697  
 Oregon    : 4929   Paso Robles         : 2155   Willamette Valley: 3142   Paul Gregutt     : 8868  
 (Other)   :62432   (Other)             :84974   (Other)          :11169   Virginie Boone   : 8708  
 NA's      :   59   NA's                :19560   NA's             :73219   (Other)          :30192  
  taster_twitter_handle                                                                title       
 @vossroger  :23560     Gloria Ferrer NV Sonoma Brut Sparkling (Sonoma County)            :     9  
 @wineschach :14046     Segura Viudas NV Aria Estate Extra Dry Sparkling (Cava)           :     7  
 @kerinokeefe: 9697     Segura Viudas NV Extra Dry Sparkling (Cava)                       :     7  
 @paulgwine  : 8868     Bailly-Lapierre NV Brut  (Crémant de Bourgogne)                   :     6  
 @vboone     : 8708     Gloria Ferrer NV Blanc de Noirs Sparkling (Carneros)              :     6  
 (Other)     :25663     J Vineyards & Winery NV Brut Rosé Sparkling (Russian River Valley):     6  
 NA's        :29446     (Other)                                                           :119947  
                     variety                     winery       Scaled_Points     Percentile_Points  
 Pinot Noir              :12278   Wines & Winemakers:   211   Min.   :-4.3836   Min.   :  0.01031  
 Chardonnay              :10868   Williams Selyem   :   204   1st Qu.:-0.7342   1st Qu.: 23.96647  
 Cabernet Sauvignon      : 8840   Testarossa        :   201   Median :-0.0236   Median : 48.31980  
 Red Blend               : 8243   DFJ Vinhos        :   200   Mean   : 0.0000   Mean   : 50.00378  
 Bordeaux-style Red Blend: 6471   Louis Latour      :   192   3rd Qu.: 0.6985   3rd Qu.: 73.33108  
 (Other)                 :73287   Georges Duboeuf   :   186   Max.   : 4.3888   Max.   :100.00000  
 NA's                    :    1   (Other)           :118794                                        
 country_other                location          year     
 Length:119988      California    :33656   Min.   :1904  
 Class :character   Washington    : 7965   1st Qu.:2009  
 Mode  :character   Bordeaux      : 5556   Median :2011  
                    Tuscany       : 5391   Mean   :2011  
                    Portugal_other: 5256   3rd Qu.:2013  
                    (Other)       :62105   Max.   :2017  
                    NA's          :   59   NA's   :4285  
str(data)
Classes ‘grouped_df’, ‘tbl_df’, ‘tbl’ and 'data.frame': 119988 obs. of  18 variables:
 $ country              : Factor w/ 43 levels "Argentina","Armenia",..: 23 32 43 43 43 38 23 16 18 16 ...
 $ description          : chr  "Aromas include tropical fruit, broom, brimstone and dried herb. The palate isn't overly expressive, offering un"| __truncated__ "This is ripe and fruity, a wine that is smooth while still structured. Firm tannins are filled out with juicy r"| __truncated__ "Tart and snappy, the flavors of lime flesh and rind dominate. Some green pineapple pokes through, with crisp ac"| __truncated__ "Pineapple rind, lemon pith and orange blossom start off the aromas. The palate is a bit more opulent, with note"| __truncated__ ...
 $ designation          : Factor w/ 37979 levels "??? Vineyard",..: 36976 2352 NA 28123 36715 1996 3051 NA 30971 20046 ...
 $ points               : int  87 87 87 87 87 87 87 87 87 87 ...
 $ price                : num  NA 15 14 13 65 15 16 24 12 27 ...
 $ province             : Factor w/ 425 levels "Achaia","Aconcagua Costa",..: 334 110 269 220 269 263 334 12 310 12 ...
 $ region_1             : Factor w/ 1229 levels "Abruzzo","Adelaida District",..: 425 NA 1218 550 1218 758 1205 22 NA 22 ...
 $ region_2             : Factor w/ 17 levels "California Other",..: NA NA 17 NA 17 NA NA NA NA NA ...
 $ taster_name          : Factor w/ 20 levels "Alexander Peartree",..: 10 16 15 1 15 13 10 16 2 16 ...
 $ taster_twitter_handle: Factor w/ 15 levels "@AnneInVino",..: 5 11 8 NA 8 13 5 11 NA 11 ...
 $ title                : Factor w/ 118840 levels ":Nota Bene 2005 Una Notte Red (Washington)",..: 79669 89457 89940 101059 102995 103740 105794 108715 54638 59312 ...
 $ variety              : Factor w/ 707 levels "Abouriou","Agiorgitiko",..: 692 452 438 481 442 593 188 211 211 438 ...
 $ winery               : Factor w/ 16757 levels ":Nota Bene","1+1=3",..: 11641 12988 13054 14432 14665 14740 15046 15435 8433 9014 ...
 $ Scaled_Points        : num  -0.75 -0.562 -0.734 0.628 -0.734 ...
 $ Percentile_Points    : num  23.7 31.9 23.8 73.9 23.8 ...
 $ country_other        : chr  "Italy_other" "Portugal_other" "US_other" "US_other" ...
 $ location             : Factor w/ 62 levels "Alsace","Argentina_other",..: 47 44 41 60 41 40 47 1 23 1 ...
 $ year                 : num  2013 2011 2013 2013 2012 ...
 - attr(*, "vars")= chr "taster_name"
 - attr(*, "labels")='data.frame':  20 obs. of  1 variable:
  ..$ taster_name: chr  "Alexander Peartree" "Anna Lee C. Iijima" "Anne Krebiehl MW" "Carrie Dykes" ...
  ..- attr(*, "vars")= chr "taster_name"
  ..- attr(*, "labels")='data.frame':   20 obs. of  1 variable:
  .. ..$ taster_name: chr  "Alexander Peartree" "Anna Lee C. Iijima" "Anne Krebiehl MW" "Carrie Dykes" ...
  .. ..- attr(*, "vars")= chr "taster_name"
  .. ..- attr(*, "drop")= logi TRUE
  ..- attr(*, "indices")=List of 20
  .. ..$ : int  3 19 20 724 740 902 905 906 1202 1396 ...
  .. ..$ : int  8 15 76 85 97 100 101 102 148 156 ...
  .. ..$ : int  93 340 423 428 435 441 444 584 586 587 ...
  .. ..$ : int  1625 1631 2379 3574 6939 8574 10533 10541 15859 15873 ...
  .. ..$ : int  19635 40490 42883 57207 66923 92873
  .. ..$ : int  1627 2380 2406 19477 19483 20404 29095 30291 30298 31221 ...
  .. ..$ : int  219 351 873 1543 2480 2495 2740 2741 2742 3550 ...
  .. ..$ : int  68 199 424 433 519 558 561 571 579 588 ...
  .. ..$ : int  77 83 123 136 174 191 209 211 238 293 ...
  .. ..$ : int  0 6 13 22 24 26 27 28 61 72 ...
  .. ..$ : int  197 208 210 225 226 256 257 258 264 265 ...
  .. ..$ : int  14 23 64 108 114 115 116 117 145 146 ...
  .. ..$ : int  5 16 17 18 36 44 51 58 80 81 ...
  .. ..$ : int  202 215 913 1023 1187 3336 3578 4038 4229 4709 ...
  .. ..$ : int  2 4 21 35 41 78 173 233 248 251 ...
  .. ..$ : int  1 7 9 11 30 42 49 53 63 65 ...
  .. ..$ : int  59 62 67 70 86 94 421 422 429 432 ...
  .. ..$ : int  230 271 308 325 409 410 415 596 984 1063 ...
  .. ..$ : int  10 12 25 29 56 60 71 73 74 75 ...
  .. ..$ : int  31 32 33 34 37 38 39 40 43 45 ...
  ..- attr(*, "drop")= logi TRUE
  ..- attr(*, "group_sizes")= int  383 4017 3290 129 6 24 436 3766 4766 9697 ...
  ..- attr(*, "biggest_group_size")= int 24917
 - attr(*, "indices")=List of 20
  ..$ : int  3 19 20 724 740 902 905 906 1202 1396 ...
  ..$ : int  8 15 76 85 97 100 101 102 148 156 ...
  ..$ : int  93 340 423 428 435 441 444 584 586 587 ...
  ..$ : int  1625 1631 2379 3574 6939 8574 10533 10541 15859 15873 ...
  ..$ : int  19635 40490 42883 57207 66923 92873
  ..$ : int  1627 2380 2406 19477 19483 20404 29095 30291 30298 31221 ...
  ..$ : int  219 351 873 1543 2480 2495 2740 2741 2742 3550 ...
  ..$ : int  68 199 424 433 519 558 561 571 579 588 ...
  ..$ : int  77 83 123 136 174 191 209 211 238 293 ...
  ..$ : int  0 6 13 22 24 26 27 28 61 72 ...
  ..$ : int  197 208 210 225 226 256 257 258 264 265 ...
  ..$ : int  14 23 64 108 114 115 116 117 145 146 ...
  ..$ : int  5 16 17 18 36 44 51 58 80 81 ...
  ..$ : int  202 215 913 1023 1187 3336 3578 4038 4229 4709 ...
  ..$ : int  2 4 21 35 41 78 173 233 248 251 ...
  ..$ : int  1 7 9 11 30 42 49 53 63 65 ...
  ..$ : int  59 62 67 70 86 94 421 422 429 432 ...
  ..$ : int  230 271 308 325 409 410 415 596 984 1063 ...
  ..$ : int  10 12 25 29 56 60 71 73 74 75 ...
  ..$ : int  31 32 33 34 37 38 39 40 43 45 ...
 - attr(*, "drop")= logi TRUE
 - attr(*, "group_sizes")= int  383 4017 3290 129 6 24 436 3766 4766 9697 ...
 - attr(*, "biggest_group_size")= int 24917
data <- data %>% select(-country_other, -taster_twitter_handle, -description, -winery, -designation)
# split train test
set.seed(2018)
train.index <- sample(2/3 * nrow(data))
train <- data[train.index,]
test <- data[-train.index,]
dim(train)
[1] 79992    13
summary(train)
     country          points           price              province                     region_1    
 US      :33364   Min.   : 80.00   Min.   :   4.0   California:22172   Napa Valley         : 2733  
 France  :13682   1st Qu.: 86.00   1st Qu.:  17.0   Washington: 5244   Columbia Valley (WA): 2495  
 Italy   :12151   Median : 88.00   Median :  25.0   Bordeaux  : 3777   Russian River Valley: 1881  
 Spain   : 4121   Mean   : 88.44   Mean   :  35.4   Tuscany   : 3644   California          : 1614  
 Portugal: 3597   3rd Qu.: 91.00   3rd Qu.:  42.0   Oregon    : 3347   Willamette Valley   : 1459  
 (Other) :13039   Max.   :100.00   Max.   :3300.0   (Other)   :41770   (Other)             :56775  
 NA's    :   38                    NA's   :5641     NA's      :   38   NA's                :13035  
              region_2                taster_name   
 Central Coast    : 6734   NA               :16586  
 Sonoma           : 5532   Roger Voss       :15883  
 Columbia Valley  : 4904   Michael Schachner: 9377  
 Napa             : 4163   Kerin O’Keefe    : 6527  
 Willamette Valley: 2137   Paul Gregutt     : 5984  
 (Other)          : 7403   Virginie Boone   : 5717  
 NA's             :49119   (Other)          :19918  
                                                     title                           variety     
 Korbel NV Brut Sparkling (California)                  :    6   Pinot Noir              : 8080  
 Segura Viudas NV Aria Estate Extra Dry Sparkling (Cava):    6   Chardonnay              : 7190  
 Segura Viudas NV Extra Dry Sparkling (Cava)            :    6   Cabernet Sauvignon      : 5848  
 Bailly-Lapierre NV Brut  (Crémant de Bourgogne)        :    5   Red Blend               : 5577  
 Gloria Ferrer NV Sonoma Brut Sparkling (Sonoma County) :    5   Bordeaux-style Red Blend: 4276  
 Jacquart NV Brut Mosaïque  (Champagne)                 :    5   Riesling                : 3158  
 (Other)                                                :79959   (Other)                 :45863  
 Scaled_Points       Percentile_Points             location          year     
 Min.   :-4.383631   Min.   :  0.01031   California    :22172   Min.   :1904  
 1st Qu.:-0.726995   1st Qu.: 23.96647   Washington    : 5244   1st Qu.:2008  
 Median :-0.023602   Median : 48.31980   Bordeaux      : 3777   Median :2011  
 Mean   :-0.001477   Mean   : 49.93261   Tuscany       : 3644   Mean   :2011  
 3rd Qu.: 0.698512   3rd Qu.: 73.33108   Portugal_other: 3597   3rd Qu.:2013  
 Max.   : 4.388765   Max.   :100.00000   (Other)       :41520   Max.   :2017  
                                         NA's          :   38   NA's   :2909  
dim(test)
[1] 39996    13
summary(test)
     country          points           price               province                     region_1    
 US      :17093   Min.   : 80.00   Min.   :   4.00   California:11484   Napa Valley         : 1441  
 France  : 6671   1st Qu.: 86.00   1st Qu.:  17.00   Washington: 2721   Columbia Valley (WA): 1300  
 Italy   : 5789   Median : 88.00   Median :  25.00   Bordeaux  : 1779   Russian River Valley:  981  
 Spain   : 1995   Mean   : 88.44   Mean   :  36.07   Tuscany   : 1747   California          :  854  
 Portugal: 1659   3rd Qu.: 91.00   3rd Qu.:  44.00   Oregon    : 1582   Mendoza             :  749  
 (Other) : 6768   Max.   :100.00   Max.   :2500.00   (Other)   :20662   (Other)             :28146  
 NA's    :   21                    NA's   :2754      NA's      :   21   NA's                : 6525  
              region_2                taster_name   
 Central Coast    : 3499   NA               : 8331  
 Sonoma           : 2858   Roger Voss       : 7677  
 Columbia Valley  : 2562   Michael Schachner: 4669  
 Napa             : 2206   Kerin O’Keefe    : 3170  
 Willamette Valley: 1005   Virginie Boone   : 2991  
 (Other)          : 3766   Paul Gregutt     : 2884  
 NA's             :24100   (Other)          :10274  
                                                    title                           variety     
 Gloria Ferrer NV Blanc de Noirs Sparkling (Carneros)  :    4   Pinot Noir              : 4198  
 Gloria Ferrer NV Sonoma Brut Sparkling (Sonoma County):    4   Chardonnay              : 3678  
 Ruinart NV Blanc de Blancs Brut Chardonnay (Champagne):    4   Cabernet Sauvignon      : 2992  
 A.R. Lenoble  NV Terroirs Brut Rosé  (Champagne)      :    3   Red Blend               : 2666  
 Canard-Duchêne NV Cuvée Léonie Brut  (Champagne)      :    3   Bordeaux-style Red Blend: 2195  
 Freixenet NV Carta Nevada Brut Sparkling (Cava)       :    3   (Other)                 :24266  
 (Other)                                               :39975   NA's                    :    1  
 Scaled_Points       Percentile_Points             location          year     
 Min.   :-4.383631   Min.   :  0.01745   California    :11484   Min.   :1927  
 1st Qu.:-0.734211   1st Qu.: 23.96647   Washington    : 2721   1st Qu.:2009  
 Median : 0.040399   Median : 53.38887   Bordeaux      : 1779   Median :2011  
 Mean   : 0.002954   Mean   : 50.14612   Tuscany       : 1747   Mean   :2011  
 3rd Qu.: 0.735078   3rd Qu.: 74.94907   Portugal_other: 1659   3rd Qu.:2013  
 Max.   : 3.993459   Max.   :100.00000   (Other)       :20585   Max.   :2017  
                                         NA's          :   21   NA's   :1376  
# clean train
clean_train <- train[lapply(train, function(x) sum(is.na(x)) / length(x))  < 0.1]
clean_train <- clean(clean_train)
clean_train$`US_vs_non-US` <- factor(ifelse(clean_train$country == 'US', 'US', 'non-US'))
clean_train[is.na(clean_train$country), 'US_vs_non-US'] <- NA
clean_train$`US_vs_non-US` <- addNA(clean_train$`US_vs_non-US`)
#clean_train <- clean_train %>% select(-title, -country)
clean_train <- clean_train %>% select(-Scaled_Points, -Percentile_Points)
dim(clean_train)
[1] 77038    10
ggplot(aes(x=points, y=price, col = taster_name), data = clean_train) + geom_jitter()

ggplot(aes(x=points, y=price, col = `US_vs_non-US`), data = clean_train) + geom_jitter() + theme(legend.position = "right")

ggplot(aes(x=points, y=price, col = variety), data = clean_train) + geom_jitter() + theme(legend.position = 'none')

ggplot(aes(x=year, y=price, col=`US_vs_non-US`), data = clean_train) + geom_jitter()

ggplot(aes(x=year, y=points, col=`US_vs_non-US`), data = clean_train) + geom_jitter()

# gam only: points and price
set.seed(2018)
k <- 10
sp <- split(c(1:nrow(train)), c(1:k))
data length is not a multiple of split variable
price_pt_gam_error <- matrix(NA, nrow=k, ncol=2)
for(i in 1:k){
    cleanwine_train <- train[-sp[[i]], ]
    cleanwine_test <- train[sp[[i]], ]
    
    # data cleaning
    cleanwine_train <- cleanwine_train[lapply(cleanwine_train, function(x) sum(is.na(x)) / length(x))  < 0.1]
    cleanwine_train <- clean(cleanwine_train)
    cleanwine_train <- cleanwine_train %>% select(-title, -country)
    #print(colnames(cleanwine_train))
    #print(head(cleanwine_train))
    
    cleanwine_test <- cleanwine_test[lapply(cleanwine_test, function(x) sum(is.na(x)) / length(x))  < 0.1]
    cleanwine_test <- clean(cleanwine_test)
    cleanwine_test <- cleanwine_test %>% select(-title, -country)
    #print(colnames(cleanwine_test))
    #print(head(cleanwine_test))
    
    # select only Percentile_Points and Price for gam
    #cleanwine_train_gam <- cleanwine_train %>% select(Percentile_Points, price)
    #cleanwine_test_gam <- cleanwine_test %>% select(Percentile_Points, price)   
    
    # gam
    price_gam <- gam(price ~ s(Percentile_Points), data = cleanwine_train)
    pt_gam <- gam(Percentile_Points ~ s(price), data = cleanwine_train)
    price_pred <- predict(price_gam, cleanwine_test)
    pt_pred <- predict(pt_gam, cleanwine_test)
    price_pt_gam_error[i,1] <- mean(abs(price_pred- cleanwine_test$price))
    price_pt_gam_error[i,2] <- mean((pt_pred - cleanwine_test$Percentile_Points)^2)
}
price_pt_gam_error
          [,1]     [,2]
 [1,] 16.44460 589.3486
 [2,] 17.22497 588.3934
 [3,] 16.22723 593.4306
 [4,] 15.96160 589.3421
 [5,] 15.91975 601.6767
 [6,] 16.13594 575.2521
 [7,] 16.11612 586.4804
 [8,] 16.03400 592.9562
 [9,] 15.83370 580.9728
[10,] 16.39469 575.7507
# CV on train
set.seed(2018)
k <- 10
sp <- createFolds(train$variety, k)
price_fold_error <- matrix(NA, nrow=k, ncol=5)
pt_fold_error <- matrix(NA, nrow=k, ncol=5)
for(i in 1:k){
    cleanwine_train <- train[-sp[[k]], ]
    cleanwine_test <- train[sp[[k]], ]
    
    # data cleaning
    cleanwine_train <- cleanwine_train[lapply(cleanwine_train, function(x) sum(is.na(x)) / length(x))  < 0.1]
    cleanwine_train <- clean(cleanwine_train)
    cleanwine_train <- cleanwine_train %>% select(-title, -country)
    #print(colnames(cleanwine_train))
    
    cleanwine_test <- cleanwine_test[lapply(cleanwine_test, function(x) sum(is.na(x)) / length(x))  < 0.1]
    cleanwine_test <- clean(cleanwine_test)
    cleanwine_test <- cleanwine_test %>% select(-title, -country)
    #print(colnames(cleanwine_test))
    
    cleanwine_train <- cleanwine_train %>% select(-Scaled_Points, -Percentile_Points)
    train.data.price <- cleanwine_train %>% select(-price)
    #print(colnames(train.data.price))
    train.data.price <- sparse.model.matrix(~., train.data.price)[,-1]
    train.data.pt <- cleanwine_train %>% select(-points)
    train.data.pt <- sparse.model.matrix(~., train.data.pt)[,-1]
    
    cleanwine_test <- cleanwine_test %>% select(-Scaled_Points, -Percentile_Points)
    test.data.price <- cleanwine_test %>% select(-price)
    #print(colnames(test.data.price))
    test.data.price <- sparse.model.matrix(~., test.data.price)[,-1]
    #print(colnames(test.data.price))
    test.data.pt <- cleanwine_test %>% select(-points)
    test.data.pt <- sparse.model.matrix(~., test.data.pt)[,-1]
    
    # random forest
    subsamps <- seq(0.1, 1, 0.1)
    train_error_price <- vector("numeric", length(seq(0.1, 1, 0.1)))
    train_error_pt <- vector("numeric", length(seq(0.1, 1, 0.1)))
    
    for (j in 1:length(seq(0.1, 1, 0.1))) {
      rf_train_price <- xgboost(data = train.data.price, label=cleanwine_train$price, max_depth = 0, verbose = 0, num_parallel_tree = 1000, subsample = subsamps[i], nrounds = 1, colsample_bylevel=0.6, objective = "reg:linear", eval_metric = 'mae')
      rf_train_pt <- xgboost(data = train.data.pt, label=cleanwine_train$points, max_depth = 0, verbose = 0, num_parallel_tree = 1000, subsample = subsamps[i], nrounds = 1, colsample_bylevel=0.6, objective = "reg:linear", eval_metric = 'rmse')
      train_error_price[j] <- as.numeric(rf_train_price$evaluation_log[,2])
      train_error_pt[j] <- as.numeric(rf_train_pt$evaluation_log[,2])
    }
    index.min.price <- which.min(train_error_price)
    index.min.pt <- which.min(train_error_pt)
    
    rf_cleanwine_price <- xgboost(data = train.data.price, label=cleanwine_train$price, max_depth = 0, verbose = 0, num_parallel_tree = 1000, subsample = subsamps[index.min.price], nrounds = 1, colsample_bylevel=0.6, objective = "reg:linear", eval_metric = 'mae')
    #print(rf_cleanwine_price$feature_names)
    rf_cleanwine_pt <- xgboost(data = train.data.pt, label=cleanwine_train$points, max_depth = 0, verbose = 0, num_parallel_tree = 1000, subsample = subsamps[index.min.pt], nrounds = 1, colsample_bylevel=0.6, objective = "reg:linear", eval_metric = 'rmse')
    print(length(rf_cleanwine_pt$feature_names) == length(colnames(test.data.price)))
    test.pred.price <- predict(rf_cleanwine_price, test.data.price)
    price_fold_error[i, 1] <- mean(abs(cleanwine_test$price - test.pred.price))
    test.pred.pt <- predict(rf_cleanwine_pt, test.data.pt)                               
    pt_fold_error[i,1] <- mean((cleanwine_test$price - test.pred.pt)^2)
    
    # svm
    svm_cleanwine_price <- svm(price~., cleanwine_train)
    svm_cleanwine_pt <- svm(points~., cleanwine_train)
    result_price <- test(svm_cleanwine_price, cleanwine_test)
    price_fold_error[i,2] <- mean(abs(cleanwine_test$price - result_price))
    result_pt <- test(svm_cleanwine_pt, cleanwine_test)
    pt_fold_error[i, 2] <- mean((cleanwine_test$points - result_pt)^2)
    
    # gbm
    gbmFit_price <- gbm(formula = price ~ ., data = cleanwine_train, 
                  n.trees = 1000, shrinkage = 0.05, interaction.depth = 2, cv.folds = 10, 
                  distribution = "laplace", verbose = FALSE)
    best_iter_price <- gbm.perf(gbmFit_price, method = "cv", plot.it = F)
    gbm_price_pred <- predict(gbmFit_price, newdata = cleanwine_test, n.trees = best_iter_price)
    price_fold_error[i,3] <- mean(abs(cleanwine_test$price - gbm_price_pred))
    
    gbmFit_pt <- gbm(formula = points ~ ., data = cleanwine_train, 
                  n.trees = 1000, shrinkage = 0.05, interaction.depth = 2, cv.folds = 10, 
                  distribution = "laplace", verbose = FALSE) 
    best_iter_pt <- gbm.perf(gbmFit_pt, method = "cv", plot.it = F)
    gbm_pt_pred <- predict(gbmFit_pt, newdata = cleanwine_test, n.trees = best_iter_pt)
    pt_fold_error[i,3] <- mean((cleanwine_test$points - gbm_pt_pred)^2)
    
    # glmnet
    # trn.mtx <- model.matrix(~.,cleanwine_train)
    # trn.smtx <- Matrix(trn.mtx,sparse=T)[,-1]
    # 
    # tst.mtx <- model.matrix(~.,cleanwine_test)
    # tst.smtx <- Matrix(tst.mtx,sparse=T)[,-1]
    
    fit.lasso.price <- cv.glmnet(x=train.data.price,cleanwine_train$price,alpha = 1,type.measure = "mae") 
    l.err.price <- predict(fit.lasso.price,newx = test.data.price,type = 'response')
    price_fold_error[i,4] <- mean(abs(cleanwine_test$price - l.err.price))
 
    fit.ridge.price <- cv.glmnet(x=train.data.price,cleanwine_train$price,alpha=0,type.measure = "mae")
    r.err.price <- predict(fit.ridge.price,newx = test.data.price, type = 'response')
    price_fold_error[i,5] <- mean(abs(cleanwine_test$price - r.err.price))
    
    fit.lasso.pt <- cv.glmnet(x=train.data.pt,cleanwine_train$points,alpha = 1,type.measure = "mse") 
    l.err.pt <- predict(fit.lasso.pt,newx = test.data.pt,type = 'response')
    pt_fold_error[i,4] <- mean((cleanwine_test$points - l.err.pt)^2)
 
    fit.ridge.pt <- cv.glmnet(x=train.data.pt,cleanwine_train$points,alpha=0,type.measure = "mse")
    r.err.pt <- predict(fit.ridge.pt,newx = test.data.pt, type = 'response')
    pt_fold_error[i,5] <- mean((cleanwine_test$points - r.err.pt)^2)
}
[1] TRUE

[1] TRUE

[1] TRUE

[1] TRUE

[1] TRUE

[1] TRUE

[1] TRUE

[1] TRUE

[1] TRUE

[1] TRUE

colnames(price_fold_error) <- c("random_forest", "svm", "gbm", "lasso", "ridge")
colnames(pt_fold_error) <- c("random_forest", "svm", "gbm", "lasso", "ridge")
price_fold_error
      random_forest      svm      gbm    lasso    ridge
 [1,]      24.07756 12.65667 15.63732 14.77860 14.00466
 [2,]      24.08077 12.71607 15.56506 14.78930 14.00466
 [3,]      24.07985 12.62514 15.73344 14.80146 14.00466
 [4,]      24.07889 12.87436 15.70824 14.77860 14.04065
 [5,]      24.08126 12.53914 15.60783 14.77860 14.00466
 [6,]      24.07933 12.70176 15.79083 14.77860 14.00466
 [7,]      24.07805 12.80975 15.65192 14.76829 14.04065
 [8,]      24.07805 12.77349 15.67500 14.81334 13.97669
 [9,]      24.08122 12.82400 15.71277 14.78930 14.00466
[10,]      24.07889 12.66297 15.64105 14.78930 14.00466
pt_fold_error
      random_forest      svm      gbm    lasso    ridge
 [1,]      1254.200 5.070732 7.089304 6.659461 6.693300
 [2,]      1254.195 5.056448 7.097442 6.610804 6.693300
 [3,]      1254.215 5.036886 7.176921 6.582427 6.746977
 [4,]      1254.203 5.054266 7.278442 6.690893 6.719021
 [5,]      1254.229 5.033564 7.269607 6.643468 6.693300
 [6,]      1254.256 5.049380 7.404647 6.596118 6.669693
 [7,]      1254.204 5.045616 7.227224 6.596118 6.693300
 [8,]      1254.198 5.058707 7.174839 6.627099 6.719021
 [9,]      1254.213 5.045101 7.244036 6.582427 6.693300
[10,]      1254.260 5.042953 7.129418 6.582427 6.693300
result_test_price <- predict(svm_test_price, clean_test)
mean(abs(clean_test$price - result_test_price))
[1] 13.78876
result_test_pt <- predict(svm_test_pt, clean_test)
mean((clean_test$points - result_test_pt)^2)
[1] 5.553673
clean_test$country <- clean_test_country
clean_test$province <- clean_test_province
clean_test$price_pred <- result_test_price
clean_test$point_pred <- result_test_pt
write_csv(clean_test, "clean_test.csv")
price_pt_df <- data.frame(cbind(clean_test$points, clean_test$price, result_test_price, result_test_pt))
colnames(price_pt_df) <- c("points", "price", "pred_price", "pred_point")
ggplot(aes(x=points, y=price), data = price_pt_df) + geom_jitter() + geom_point(aes(x=points, y=pred_price), col="red")

ggplot(aes(x=price, y=points), data = price_pt_df) + geom_jitter() + geom_point(aes(x=price, y=pred_point), col="red")

price_pt_pred <- data.frame(cbind(result_test_price, result_test_pt))
colnames(price_pt_pred) <- c("price_pred", "point_pred")
clean_test_pred <- merge(clean_test, price_pt_pred)
# test
cleanwine <- data
cleanwine$location <- factor(cleanwine$location)
cleanwine$taster_name <- factor(cleanwine$taster_name)
cleanwine$title <- factor(cleanwine$title)
cleanwine$variety <- factor(cleanwine$variety)
cleanwine$taster_name <- addNA(cleanwine$taster_name)
cleanwine$region_1 <- factor(cleanwine$region_1)
cleanwine$region_2 <- factor(cleanwine$region_2)

cleanwine <- cleanwine[lapply(cleanwine, function(x) sum(is.na(x)) / length(x) ) < 0.1]
cleanwine <- clean(cleanwine)
cleanwine <- cleanwine %>% select(-title, -country)
summary(cleanwine)

tmp = data #%>% select(-Scaled_Points, -Percentile_Points)

train.idx <- sample(nrow(tmp), 4/5 * nrow(tmp))
cleanwine_train <- tmp[train.idx, ]
cleanwine_test <- tmp[-train.idx, ]

cleanwine_train$location <- factor(cleanwine_train$location)
    cleanwine_train$taster_name <- factor(cleanwine_train$taster_name)
    cleanwine_train$taster_name <- addNA(cleanwine_train$taster_name)
    cleanwine_train$title <- factor(cleanwine_train$title)
    cleanwine_train$variety <- factor(cleanwine_train$variety)
    cleanwine_train$region_1 <- factor(cleanwine_train$region_1)
    cleanwine_train$region_2 <- factor(cleanwine_train$region_2)
    
    cleanwine_test$location <- factor(cleanwine_test$location)
    cleanwine_test$taster_name <- factor(cleanwine_test$taster_name)
    cleanwine_test$taster_name <- addNA(cleanwine_test$taster_name)
    cleanwine_test$title <- factor(cleanwine_test$title)
    cleanwine_test$variety <- factor(cleanwine_test$variety)
    cleanwine_test$region_1 <- factor(cleanwine_test$region_1)
    cleanwine_test$region_2 <- factor(cleanwine_test$region_2)

    cleanwine_train <- cleanwine_train[lapply(cleanwine_train, function(x) sum(is.na(x)) / length(x) ) < 0.1]
    cleanwine_train <- clean(cleanwine_train)
    cleanwine_train <- cleanwine_train %>% select(-title, -country)
    print(colnames(cleanwine_train))
    
    cleanwine_test <- cleanwine_test[lapply(cleanwine_test, function(x) sum(is.na(x)) / length(x) ) < 0.1]
    cleanwine_test <- clean(cleanwine_test)
    cleanwine_test <- cleanwine_test %>% select(-title, -country)
    print(colnames(cleanwine_test))
    
    cleanwine_train <- cleanwine_train %>% select(-Scaled_Points, -Percentile_Points)
    train.data.price <- cleanwine_train %>% select(-price)
    print(colnames(train.data.price))
    train.data.price <- sparse.model.matrix(~., train.data.price)[,-1]
    train.data.pt <- cleanwine_train %>% select(-points)
    train.data.pt <- sparse.model.matrix(~., train.data.pt)[,-1]
    
    cleanwine_test <- cleanwine_test %>% select(-Scaled_Points, -Percentile_Points)
    test.data.price <- cleanwine_test %>% select(-price)
    print(colnames(test.data.price))
    test.data.price <- sparse.model.matrix(~., test.data.price)[,-1]
    print(colnames(test.data.price))
    test.data.pt <- cleanwine_test %>% select(-points)
    test.data.pt <- sparse.model.matrix(~., test.data.pt)[,-1]
# tmp = cleanwine %>% select(-Scaled_Points, -Percentile_Points)
# 
# train.idx <- sample(nrow(tmp), 4/5 * nrow(tmp))
# tmp.train <- tmp[train.idx, ]
# tmp.test <- tmp[-train.idx, ]
# train.data.price <- tmp.train %>% select(-price)
# train.data.price <- sparse.model.matrix(~., train.data.price)[,-1]
# print(colnames(train.data.price))
# train.data.pt <- tmp.train %>% select(-points)
# train.data.pt <- sparse.model.matrix(~., train.data.pt)[,-1]
# test.data.price <- tmp.test %>% select(-price)
# test.data.price <- sparse.model.matrix(~., test.data.price)[,-1]
# print(colnames(test.data.price))
# test.data.pt <- tmp.test %>% select(-points)
# test.data.pt <- sparse.model.matrix(~., test.data.pt)[,-1]

subsamps <- seq(0.1, 1, 0.1)
colsamps <- seq(0.1, 1, 0.1)
#train_error <- matrix(NA, nrow = length(seq(0.1, 1, 0.1)), ncol = length(seq(0.1, 1, 0.1)))
train_error_price <- vector("numeric", length(seq(0.1, 1, 0.1)))
train_error_pt <- vector("numeric", length(seq(0.1, 1, 0.1)))
for (i in 1:length(seq(0.1, 1, 0.1))) {
  #for (j in 1:length(seq(0.1, 1, 0.1))) {
    rf_cleanwine_price <- xgboost(data = train.data.price, label=cleanwine_train$price, verbose = 0, max_depth = 5, num_parallel_tree = 1000, subsample = subsamps[i], nrounds = 1, colsample_bylevel=0.6, objective = "reg:linear", eval_metric = 'mae')
    rf_cleanwine_pt <- xgboost(data = train.data.pt, label=cleanwine_train$points, verbose = 0, max_depth = 5, num_parallel_tree = 1000, subsample = subsamps[i], nrounds = 1, colsample_bylevel=0.6, objective = "reg:linear", eval_metric = 'rmse')
    #train_error[i,j] <- as.numeric(rf_cleanwine$evaluation_log[,2])
    train_error_price[i] <- as.numeric(rf_cleanwine_price$evaluation_log[,2])
    train_error_pt[i] <- as.numeric(rf_cleanwine_pt$evaluation_log[,2])
  #}
}
min.index.price <- which(train_error_price == min(train_error_price), arr.ind = TRUE)
min.index.pt <- which(train_error_pt == min(train_error_pt), arr.ind = TRUE)
rf_cleanwine_price <- xgboost(data = train.data.price, label=cleanwine_train$price, max_depth = 0, verbose = 0, num_parallel_tree = 1000, subsample = subsamps[min.index.price], nrounds = 1, colsample_bylevel=0.6, objective = "reg:linear", eval_metric = 'mae')
print(rf_cleanwine_price$feature_names)
rf_cleanwine_pt <- xgboost(data = train.data.pt, label=cleanwine_train$points, max_depth = 0, verbose = 0, num_parallel_tree = 1000, subsample = subsamps[min.index.pt], nrounds = 1, colsample_bylevel=0.6, objective = "reg:linear", eval_metric = 'rmse')

test.pred.price <- predict(rf_cleanwine_price, test.data.price)
mean(abs(test.pred.price - tmp.test$price))

test.pred.pt <- predict(rf_cleanwine_pt, test.data.pt)
mean((test.pred.pt - tmp.test$points)^2)
which(train_error == min(train_error), arr.ind = TRUE)
#gammalist <- c(0.005,0.01,0.015,0.02,0.025,0.03,0.035,0.04,0.045,0.05)
#tune.out <- tune.svm(tmp.data, tmp$price, 
                 #kernel='linear', cost=2^(-1:5), gamma = gammalist)
#summary(tune.out)

svm_cleanwine_price <- svm(price~., tmp.train)
svm_cleanwine_price

svm_cleanwine_pt <- svm(points~., tmp.train)
svm_cleanwine_pt
result_price <- test(svm_cleanwine_price, tmp.test)
mean(abs(tmp$price - result_price))

result_pt <- test(svm_cleanwine_pt, tmp.test)
mean((tmp$points - result_pt)^2)
## Split in training and test data
train.idx <- sample(nrow(iris), 2/3 * nrow(iris))
iris.train <- iris[train.idx, ]
iris.test <- iris[-train.idx, ]

## Run case-specific RF
csrf(Species ~ ., training_data = iris.train, test_data = iris.test, 
     params1 = list(num.trees = 50, mtry = 4), 
     params2 = list(num.trees = 5))
#idx = sample(1:900)
tmp_train = tmp[1:900, ]
tmp_test  = tmp[901:1000, ]

rf_cleanwine <- csrf(sparse.model.matrix(tmp_train %>% select(-price)), tmp_train$price, 
                     params1 = list(num.trees = 500, mtry = 4), 
                     params2 = list(num.trees = 50, mtry = 4))
rf_cleanwine <- csrf(price ~ ., cleanwine %>% select(-Scaled_Points), params1 = list(num.trees = 50, mtry = 4))
# gbm
gbmFit <- gbm(formula = price ~ ., data = cleanwine_price_10perc %>% select(-title, -winery), 
              n.trees = 1000, shrinkage = 0.01, interaction.depth = 2, cv.folds = 10, 
              distribution = "gaussian")

best_iter <- gbm.perf(gbmFit, method = "cv")

# Performance on whole dataset

diff_squared <- (cleanwine_price_10perc$price - 
    predict(gbmFit, newdata = cleanwine_price_10perc, n.trees = best_iter))^2

mean(diff_squared, na.rm = TRUE)
# ranger
csrf_wine <- csrf(formula = price ~ ., training_data = cleanwine_price_train, test_data = cleanwine_price_test, params1 = list(num.trees = 50, mtry = 4), params2 = list(num.trees = 5))
csrf_wine
# random forest
dat.cleanwine <- sparse.model.matrix(~., data = cleanwine_price_10perc %>% select(-title, -winery))[,-1]
price <- cleanwine_price_10perc$price

rf_wine <- randomForest(as.matrix(dat.cleanwine), price, do.trace = F, 
                          importance = T) 
rf_wine
#oob_wine <- mean(price - rf_wine$votes[,2])^2)
LS0tCnRpdGxlOiAiNzA3V2luZV9CZW5qaV9ZTHRyeSIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQpgYGB7cn0KbGlicmFyeShyZWFkcikKbGlicmFyeShkcGx5cikKbGlicmFyeSh0aWR5cikKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGdsbW5ldCkKbGlicmFyeShnYm0pCmxpYnJhcnkoZ2FtKQpsaWJyYXJ5KHN0cmluZ3IpCmxpYnJhcnkoeGdib29zdCkKbGlicmFyeShjYXJldCkKbGlicmFyeShNYXRyaXgpCmxpYnJhcnkoZTEwNzEpCmxpYnJhcnkobGlxdWlkU1ZNKQpgYGAKCmBgYHtyfQojIHJlYWQgZGF0YQpyZWFkX2NzdigiRGF0YS93aW5lLXJldmlld3Mvd2luZW1hZy1kYXRhLTEzMGstdjIuY3N2IikgJT4lIHNlbGVjdCgtWDEpICU+JSB1bmlxdWUgLT4gZGF0YQpgYGAKCmBgYHtyfQojIHByZXByb2Nlc3NpbmcKc2NhbGVfdGFzdGVyIDwtIGZ1bmN0aW9uKHBvaW50cyl7CiAgICAjIHRha2VzIGEgdmVjdG9yIG9mIG51bWJlcnMsIHN1YnRyYWN0cyBldmVyeSBlbGVtZW50IGJ5IHRoZSBtZWFuIG9mIHRoZSB2ZWN0b3IsIGFuZCB0aGVuCiAgICAjIGRpdmlkZXMgZXZlcnkgZWxlbWVudCBieSB0aGUgc3RhbmRhcmQgZGV2aWF0aW9uIG9mIHRoZSB2ZWN0b3IKICAgIAogICAgcmV0dXJuKChwb2ludHMgLSBtZWFuKHBvaW50cywgbmEucm0gPSBUUlVFKSkgLyBzZChwb2ludHMsIG5hLnJtID0gVFJVRSkpCn0KCnBlcmNlbnRpbGVfdGFzdGVyIDwtIGZ1bmN0aW9uKHgpewogICAgIyB0YWtlcyBhIHZlY3RvciBvZiBudW1iZXJzLCByYW5rcyBldmVyeSBlbGVtZW50IGFuZCBkaXZpZGVzIGJ5IG4sIGdpdmluZyB0aGUgcGVyY2VudGlsZSBvZiBlYWNoIGVsZW1lbnQKICAgIHRydW5jKHJhbmsoeCkpL2xlbmd0aCh4KSAqIDEwMAp9CgpkYXRhIDwtIGRhdGEgJT4lIGdyb3VwX2J5KHRhc3Rlcl9uYW1lKSAlPiUgbXV0YXRlKCJTY2FsZWRfUG9pbnRzIiA9IHNjYWxlX3Rhc3Rlcihwb2ludHMpKQoKZGF0YSA8LSBkYXRhICU+JSBncm91cF9ieSh0YXN0ZXJfbmFtZSkgJT4lIG11dGF0ZSgiUGVyY2VudGlsZV9Qb2ludHMiID0gcGVyY2VudGlsZV90YXN0ZXIocG9pbnRzKSkKCnRhYiA8LSBkYXRhICU+JSBncm91cF9ieShwcm92aW5jZSkgJT4lIHN1bW1hcml6ZSgiUHJvcG9ydGlvbiIgPSBuKCkvbnJvdyhkYXRhKSkKdGFiIDwtIHRhYlt0YWIkUHJvcG9ydGlvbiA+IDAuMDEsIF0KCnRhYmNvdW50cnkgPC0gIGRhdGEgJT4lIGdyb3VwX2J5KGNvdW50cnkpICU+JSBzdW1tYXJpemUoIlByb3BvcnRpb24iID0gbigpL25yb3coZGF0YSkpCnRhYmNvdW50cnkgPC0gdGFiY291bnRyeVt0YWJjb3VudHJ5JFByb3BvcnRpb24gPiAwLjAxLCBdCgpkYXRhJGNvdW50cnlfb3RoZXIgPC0gaWZlbHNlKGRhdGEkY291bnRyeSAlaW4lIHRhYmNvdW50cnkkY291bnRyeSwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgcGFzdGUwKGRhdGEkY291bnRyeSwgIl9vdGhlciIpLCBkYXRhJGNvdW50cnkpCmRhdGEkbG9jYXRpb24gPC0gaWZlbHNlKGRhdGEkcHJvdmluY2UgJWluJSB0YWIkcHJvdmluY2UsIGRhdGEkcHJvdmluY2UsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBkYXRhJGNvdW50cnlfb3RoZXIpCgp5ZWFyIDwtIHN0cl9leHRyYWN0X2FsbChkYXRhJHRpdGxlLCAiWzEtMl1bMDldWzAtOV17Mn0iKQoKZGF0YSR5ZWFyIDwtIGxhcHBseSh5ZWFyLCBmdW5jdGlvbih4KXsKICAgIHggPSB4ICU+JSBhcy5udW1lcmljCiAgaWYoIWFsbChpcy5uYSh4KSkpewogICAgbmV3eCA8LSB4Wyh4ID4gMTkwMCkgJiAoeCA8IDIwMTgpXQogICAgaWYgKCFhbGwoaXMubmEobmV3eCkpKSB7CiAgICAgIG5ld3ggPC0gbWF4KG5ld3gpCiAgICAgIHJldHVybihuZXd4KQogICAgfSBlbHNlIHsKICAgICAgcmV0dXJuKE5BKQogICAgfQogIH0gZWxzZSB7CiAgICByZXR1cm4oTkEpCiAgfX0pICU+JSB1bmxpc3QKCmRhdGEkbG9jYXRpb24gPC0gZmFjdG9yKGRhdGEkbG9jYXRpb24pCmRhdGEkdGFzdGVyX25hbWUgPC0gZmFjdG9yKGRhdGEkdGFzdGVyX25hbWUpCmRhdGEkdGFzdGVyX25hbWUgPC0gYWRkTkEoZGF0YSR0YXN0ZXJfbmFtZSkKZGF0YSR0aXRsZSA8LSBmYWN0b3IoZGF0YSR0aXRsZSkKZGF0YSR2YXJpZXR5IDwtIGZhY3RvcihkYXRhJHZhcmlldHkpCmRhdGEkcmVnaW9uXzEgPC0gZmFjdG9yKGRhdGEkcmVnaW9uXzEpCmRhdGEkcmVnaW9uXzIgPC0gZmFjdG9yKGRhdGEkcmVnaW9uXzIpCmRhdGEkY291bnRyeSA8LSBmYWN0b3IoZGF0YSRjb3VudHJ5KQpkYXRhJHByb3ZpbmNlIDwtIGZhY3RvcihkYXRhJHByb3ZpbmNlKQpkYXRhJHdpbmVyeSA8LSBmYWN0b3IoZGF0YSR3aW5lcnkpCmRhdGEkdGFzdGVyX3R3aXR0ZXJfaGFuZGxlIDwtIGZhY3RvcihkYXRhJHRhc3Rlcl90d2l0dGVyX2hhbmRsZSkKZGF0YSRkZXNpZ25hdGlvbiA8LSBmYWN0b3IoZGF0YSRkZXNpZ25hdGlvbikKYGBgCgpgYGB7cn0KIyBoZWxwZXIgZnVuY3Rpb24KaW1wdXRlX21lYW4gPC0gZnVuY3Rpb24oeCkgcmVwbGFjZSh4LCBpcy5uYSh4KSwgbWVhbih4LCBuYS5ybSA9IFRSVUUpKQogICAgIyBpbXB1dGVfbWVhbiByZXBsYWNlcyBtaXNzaW5nIHZhbHVlcyB3aXRoIHRoZSBhdmVyYWdlIHZhbHVlIG9mIGEgZ3JvdXAKCmNsZWFuIDwtIGZ1bmN0aW9uKGRmKXsKICAgICMgY2xlYW4gcmVtb3ZlcyB0aGUgdmFyaWV0aWVzIHRoYXQgb25seSBoYXZlIG1pc3NpbmcgcHJpY2VzLCBhbmQgYXJlIHRodXMgdW5pbXB1dGFibGUgYnkgb3VyIHJ1bGUsCiAgICAjIGFuZCB0aGVuIGl0IGltcHV0ZXMgdGhlIHJlbWFpbmluZyBtaXNzaW5nIHByaWNlcyB1c2luZyB0aGUgYXZlcmFnZSBwcmljZSBvZiB0aGF0IHdpbmUncyB2YXJpZXR5CiAgICAKICAgIGRmICU+JSBncm91cF9ieSh2YXJpZXR5KSAlPiUgc3VtbWFyaXplKCJBdmVyYWdlX1ByaWNlIiA9IG1lYW4ocHJpY2UsIG5hLnJtID0gVCksIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIkNvdW50IiA9IG4oKSkgJT4lIAogICAgZmlsdGVyKGlzLm5hKEF2ZXJhZ2VfUHJpY2UpKSAlPiUgc2VsZWN0KHZhcmlldHkpICU+JSB1bmxpc3QoKSAtPiBkcm9wX3ZhcmlldHkgCiAgICAKICAgIGRmICU+JSBmaWx0ZXIoISh2YXJpZXR5ICVpbiUgZHJvcF92YXJpZXR5KSkgLT4gc2FtcGxlMgogICAgCiAgICBzYW1wbGUyICU+JSBncm91cF9ieSh2YXJpZXR5KSAlPiUgbXV0YXRlKHByaWNlID0gaW1wdXRlX21lYW4ocHJpY2UpKSAtPiBzYW1wbGUyCiAgICAKICAgIHNhbXBsZTIgPC0gc2FtcGxlMltjb21wbGV0ZS5jYXNlcyhzYW1wbGUyKSxdCiAgICByZXR1cm4oc2FtcGxlMikKfQpgYGAKCmBgYHtyfQojIGNoZWNrIGRhdGEKZGltKGRhdGEpCnN1bW1hcnkoZGF0YSkKc3RyKGRhdGEpCgpkYXRhIDwtIGRhdGEgJT4lIHNlbGVjdCgtY291bnRyeV9vdGhlciwgLXRhc3Rlcl90d2l0dGVyX2hhbmRsZSwgLWRlc2NyaXB0aW9uLCAtd2luZXJ5LCAtZGVzaWduYXRpb24pCmBgYAoKYGBge3J9CiMgc3BsaXQgdHJhaW4gdGVzdApzZXQuc2VlZCgyMDE4KQp0cmFpbi5pbmRleCA8LSBzYW1wbGUoMi8zICogbnJvdyhkYXRhKSkKdHJhaW4gPC0gZGF0YVt0cmFpbi5pbmRleCxdCnRlc3QgPC0gZGF0YVstdHJhaW4uaW5kZXgsXQpkaW0odHJhaW4pCnN1bW1hcnkodHJhaW4pCmRpbSh0ZXN0KQpzdW1tYXJ5KHRlc3QpCmBgYAoKCmBgYHtyfQojIGNsZWFuIHRyYWluCmNsZWFuX3RyYWluIDwtIHRyYWluW2xhcHBseSh0cmFpbiwgZnVuY3Rpb24oeCkgc3VtKGlzLm5hKHgpKSAvIGxlbmd0aCh4KSkgIDwgMC4xXQpjbGVhbl90cmFpbiA8LSBjbGVhbihjbGVhbl90cmFpbikKY2xlYW5fdHJhaW4kYFVTX3ZzX25vbi1VU2AgPC0gZmFjdG9yKGlmZWxzZShjbGVhbl90cmFpbiRjb3VudHJ5ID09ICdVUycsICdVUycsICdub24tVVMnKSkKY2xlYW5fdHJhaW5baXMubmEoY2xlYW5fdHJhaW4kY291bnRyeSksICdVU192c19ub24tVVMnXSA8LSBOQQpjbGVhbl90cmFpbiRgVVNfdnNfbm9uLVVTYCA8LSBhZGROQShjbGVhbl90cmFpbiRgVVNfdnNfbm9uLVVTYCkKI2NsZWFuX3RyYWluIDwtIGNsZWFuX3RyYWluICU+JSBzZWxlY3QoLXRpdGxlLCAtY291bnRyeSkKY2xlYW5fdHJhaW4gPC0gY2xlYW5fdHJhaW4gJT4lIHNlbGVjdCgtU2NhbGVkX1BvaW50cywgLVBlcmNlbnRpbGVfUG9pbnRzKQpkaW0oY2xlYW5fdHJhaW4pCmBgYAoKCmBgYHtyfQpnZ3Bsb3QoYWVzKHg9cG9pbnRzLCB5PXByaWNlLCBjb2wgPSB0YXN0ZXJfbmFtZSksIGRhdGEgPSBjbGVhbl90cmFpbikgKyBnZW9tX2ppdHRlcigpCmdncGxvdChhZXMoeD1wb2ludHMsIHk9cHJpY2UsIGNvbCA9IGBVU192c19ub24tVVNgKSwgZGF0YSA9IGNsZWFuX3RyYWluKSArIGdlb21faml0dGVyKCkgKyB0aGVtZShsZWdlbmQucG9zaXRpb24gPSAicmlnaHQiKQpnZ3Bsb3QoYWVzKHg9cG9pbnRzLCB5PXByaWNlLCBjb2wgPSB2YXJpZXR5KSwgZGF0YSA9IGNsZWFuX3RyYWluKSArIGdlb21faml0dGVyKCkgKyB0aGVtZShsZWdlbmQucG9zaXRpb24gPSAnbm9uZScpCmdncGxvdChhZXMoeD15ZWFyLCB5PXByaWNlLCBjb2w9YFVTX3ZzX25vbi1VU2ApLCBkYXRhID0gY2xlYW5fdHJhaW4pICsgZ2VvbV9qaXR0ZXIoKQpnZ3Bsb3QoYWVzKHg9eWVhciwgeT1wb2ludHMsIGNvbD1gVVNfdnNfbm9uLVVTYCksIGRhdGEgPSBjbGVhbl90cmFpbikgKyBnZW9tX2ppdHRlcigpCmBgYApgYGB7cn0KIyBnYW0gb25seTogcG9pbnRzIGFuZCBwcmljZQpzZXQuc2VlZCgyMDE4KQprIDwtIDEwCnNwIDwtIHNwbGl0KGMoMTpucm93KHRyYWluKSksIGMoMTprKSkKcHJpY2VfcHRfZ2FtX2Vycm9yIDwtIG1hdHJpeChOQSwgbnJvdz1rLCBuY29sPTIpCmZvcihpIGluIDE6ayl7CiAgICBjbGVhbndpbmVfdHJhaW4gPC0gdHJhaW5bLXNwW1tpXV0sIF0KICAgIGNsZWFud2luZV90ZXN0IDwtIHRyYWluW3NwW1tpXV0sIF0KICAgIAogICAgIyBkYXRhIGNsZWFuaW5nCiAgICBjbGVhbndpbmVfdHJhaW4gPC0gY2xlYW53aW5lX3RyYWluW2xhcHBseShjbGVhbndpbmVfdHJhaW4sIGZ1bmN0aW9uKHgpIHN1bShpcy5uYSh4KSkgLyBsZW5ndGgoeCkpICA8IDAuMV0KICAgIGNsZWFud2luZV90cmFpbiA8LSBjbGVhbihjbGVhbndpbmVfdHJhaW4pCiAgICBjbGVhbndpbmVfdHJhaW4gPC0gY2xlYW53aW5lX3RyYWluICU+JSBzZWxlY3QoLXRpdGxlLCAtY291bnRyeSkKICAgICNwcmludChjb2xuYW1lcyhjbGVhbndpbmVfdHJhaW4pKQogICAgI3ByaW50KGhlYWQoY2xlYW53aW5lX3RyYWluKSkKICAgIAogICAgY2xlYW53aW5lX3Rlc3QgPC0gY2xlYW53aW5lX3Rlc3RbbGFwcGx5KGNsZWFud2luZV90ZXN0LCBmdW5jdGlvbih4KSBzdW0oaXMubmEoeCkpIC8gbGVuZ3RoKHgpKSAgPCAwLjFdCiAgICBjbGVhbndpbmVfdGVzdCA8LSBjbGVhbihjbGVhbndpbmVfdGVzdCkKICAgIGNsZWFud2luZV90ZXN0IDwtIGNsZWFud2luZV90ZXN0ICU+JSBzZWxlY3QoLXRpdGxlLCAtY291bnRyeSwgLXByb3ZpbmNlKQogICAgI3ByaW50KGNvbG5hbWVzKGNsZWFud2luZV90ZXN0KSkKICAgICNwcmludChoZWFkKGNsZWFud2luZV90ZXN0KSkKICAgIAogICAgIyBzZWxlY3Qgb25seSBQZXJjZW50aWxlX1BvaW50cyBhbmQgUHJpY2UgZm9yIGdhbQogICAgI2NsZWFud2luZV90cmFpbl9nYW0gPC0gY2xlYW53aW5lX3RyYWluICU+JSBzZWxlY3QoUGVyY2VudGlsZV9Qb2ludHMsIHByaWNlKQogICAgI2NsZWFud2luZV90ZXN0X2dhbSA8LSBjbGVhbndpbmVfdGVzdCAlPiUgc2VsZWN0KFBlcmNlbnRpbGVfUG9pbnRzLCBwcmljZSkgICAKICAgIAogICAgIyBnYW0KICAgIHByaWNlX2dhbSA8LSBnYW0ocHJpY2UgfiBzKFBlcmNlbnRpbGVfUG9pbnRzKSwgZGF0YSA9IGNsZWFud2luZV90cmFpbikKICAgIHB0X2dhbSA8LSBnYW0oUGVyY2VudGlsZV9Qb2ludHMgfiBzKHByaWNlKSwgZGF0YSA9IGNsZWFud2luZV90cmFpbikKICAgIHByaWNlX3ByZWQgPC0gcHJlZGljdChwcmljZV9nYW0sIGNsZWFud2luZV90ZXN0KQogICAgcHRfcHJlZCA8LSBwcmVkaWN0KHB0X2dhbSwgY2xlYW53aW5lX3Rlc3QpCiAgICBwcmljZV9wdF9nYW1fZXJyb3JbaSwxXSA8LSBtZWFuKGFicyhwcmljZV9wcmVkLSBjbGVhbndpbmVfdGVzdCRwcmljZSkpCiAgICBwcmljZV9wdF9nYW1fZXJyb3JbaSwyXSA8LSBtZWFuKChwdF9wcmVkIC0gY2xlYW53aW5lX3Rlc3QkUGVyY2VudGlsZV9Qb2ludHMpXjIpCn0KYGBgCgpgYGB7cn0KcHJpY2VfcHRfZ2FtX2Vycm9yCmBgYAoKCmBgYHtyfQojIENWIG9uIHRyYWluCnNldC5zZWVkKDIwMTgpCmsgPC0gMTAKc3AgPC0gY3JlYXRlRm9sZHModHJhaW4kdmFyaWV0eSwgaykKcHJpY2VfZm9sZF9lcnJvciA8LSBtYXRyaXgoTkEsIG5yb3c9aywgbmNvbD01KQpwdF9mb2xkX2Vycm9yIDwtIG1hdHJpeChOQSwgbnJvdz1rLCBuY29sPTUpCmZvcihpIGluIDE6ayl7CiAgICBjbGVhbndpbmVfdHJhaW4gPC0gdHJhaW5bLXNwW1trXV0sIF0KICAgIGNsZWFud2luZV90ZXN0IDwtIHRyYWluW3NwW1trXV0sIF0KICAgIAogICAgIyBkYXRhIGNsZWFuaW5nCiAgICBjbGVhbndpbmVfdHJhaW4gPC0gY2xlYW53aW5lX3RyYWluW2xhcHBseShjbGVhbndpbmVfdHJhaW4sIGZ1bmN0aW9uKHgpIHN1bShpcy5uYSh4KSkgLyBsZW5ndGgoeCkpICA8IDAuMV0KICAgIGNsZWFud2luZV90cmFpbiA8LSBjbGVhbihjbGVhbndpbmVfdHJhaW4pCiAgICBjbGVhbndpbmVfdHJhaW4gPC0gY2xlYW53aW5lX3RyYWluICU+JSBzZWxlY3QoLXRpdGxlLCAtY291bnRyeSwgLXByb3ZpbmNlKQogICAgI3ByaW50KGNvbG5hbWVzKGNsZWFud2luZV90cmFpbikpCiAgICAKICAgIGNsZWFud2luZV90ZXN0IDwtIGNsZWFud2luZV90ZXN0W2xhcHBseShjbGVhbndpbmVfdGVzdCwgZnVuY3Rpb24oeCkgc3VtKGlzLm5hKHgpKSAvIGxlbmd0aCh4KSkgIDwgMC4xXQogICAgY2xlYW53aW5lX3Rlc3QgPC0gY2xlYW4oY2xlYW53aW5lX3Rlc3QpCiAgICBjbGVhbndpbmVfdGVzdCA8LSBjbGVhbndpbmVfdGVzdCAlPiUgc2VsZWN0KC10aXRsZSwgLWNvdW50cnkpCiAgICAjcHJpbnQoY29sbmFtZXMoY2xlYW53aW5lX3Rlc3QpKQogICAgCiAgICBjbGVhbndpbmVfdHJhaW4gPC0gY2xlYW53aW5lX3RyYWluICU+JSBzZWxlY3QoLVNjYWxlZF9Qb2ludHMsIC1QZXJjZW50aWxlX1BvaW50cykKICAgIHRyYWluLmRhdGEucHJpY2UgPC0gY2xlYW53aW5lX3RyYWluICU+JSBzZWxlY3QoLXByaWNlKQogICAgI3ByaW50KGNvbG5hbWVzKHRyYWluLmRhdGEucHJpY2UpKQogICAgdHJhaW4uZGF0YS5wcmljZSA8LSBzcGFyc2UubW9kZWwubWF0cml4KH4uLCB0cmFpbi5kYXRhLnByaWNlKVssLTFdCiAgICB0cmFpbi5kYXRhLnB0IDwtIGNsZWFud2luZV90cmFpbiAlPiUgc2VsZWN0KC1wb2ludHMpCiAgICB0cmFpbi5kYXRhLnB0IDwtIHNwYXJzZS5tb2RlbC5tYXRyaXgofi4sIHRyYWluLmRhdGEucHQpWywtMV0KICAgIAogICAgY2xlYW53aW5lX3Rlc3QgPC0gY2xlYW53aW5lX3Rlc3QgJT4lIHNlbGVjdCgtU2NhbGVkX1BvaW50cywgLVBlcmNlbnRpbGVfUG9pbnRzKQogICAgdGVzdC5kYXRhLnByaWNlIDwtIGNsZWFud2luZV90ZXN0ICU+JSBzZWxlY3QoLXByaWNlKQogICAgI3ByaW50KGNvbG5hbWVzKHRlc3QuZGF0YS5wcmljZSkpCiAgICB0ZXN0LmRhdGEucHJpY2UgPC0gc3BhcnNlLm1vZGVsLm1hdHJpeCh+LiwgdGVzdC5kYXRhLnByaWNlKVssLTFdCiAgICAjcHJpbnQoY29sbmFtZXModGVzdC5kYXRhLnByaWNlKSkKICAgIHRlc3QuZGF0YS5wdCA8LSBjbGVhbndpbmVfdGVzdCAlPiUgc2VsZWN0KC1wb2ludHMpCiAgICB0ZXN0LmRhdGEucHQgPC0gc3BhcnNlLm1vZGVsLm1hdHJpeCh+LiwgdGVzdC5kYXRhLnB0KVssLTFdCiAgICAKICAgICMgcmFuZG9tIGZvcmVzdAogICAgc3Vic2FtcHMgPC0gc2VxKDAuMSwgMSwgMC4xKQogICAgdHJhaW5fZXJyb3JfcHJpY2UgPC0gdmVjdG9yKCJudW1lcmljIiwgbGVuZ3RoKHNlcSgwLjEsIDEsIDAuMSkpKQogICAgdHJhaW5fZXJyb3JfcHQgPC0gdmVjdG9yKCJudW1lcmljIiwgbGVuZ3RoKHNlcSgwLjEsIDEsIDAuMSkpKQogICAgCiAgICBmb3IgKGogaW4gMTpsZW5ndGgoc2VxKDAuMSwgMSwgMC4xKSkpIHsKICAgICAgcmZfdHJhaW5fcHJpY2UgPC0geGdib29zdChkYXRhID0gdHJhaW4uZGF0YS5wcmljZSwgbGFiZWw9Y2xlYW53aW5lX3RyYWluJHByaWNlLCBtYXhfZGVwdGggPSAwLCB2ZXJib3NlID0gMCwgbnVtX3BhcmFsbGVsX3RyZWUgPSAxMDAwLCBzdWJzYW1wbGUgPSBzdWJzYW1wc1tpXSwgbnJvdW5kcyA9IDEsIGNvbHNhbXBsZV9ieWxldmVsPTAuNiwgb2JqZWN0aXZlID0gInJlZzpsaW5lYXIiLCBldmFsX21ldHJpYyA9ICdtYWUnKQogICAgICByZl90cmFpbl9wdCA8LSB4Z2Jvb3N0KGRhdGEgPSB0cmFpbi5kYXRhLnB0LCBsYWJlbD1jbGVhbndpbmVfdHJhaW4kcG9pbnRzLCBtYXhfZGVwdGggPSAwLCB2ZXJib3NlID0gMCwgbnVtX3BhcmFsbGVsX3RyZWUgPSAxMDAwLCBzdWJzYW1wbGUgPSBzdWJzYW1wc1tpXSwgbnJvdW5kcyA9IDEsIGNvbHNhbXBsZV9ieWxldmVsPTAuNiwgb2JqZWN0aXZlID0gInJlZzpsaW5lYXIiLCBldmFsX21ldHJpYyA9ICdybXNlJykKICAgICAgdHJhaW5fZXJyb3JfcHJpY2Vbal0gPC0gYXMubnVtZXJpYyhyZl90cmFpbl9wcmljZSRldmFsdWF0aW9uX2xvZ1ssMl0pCiAgICAgIHRyYWluX2Vycm9yX3B0W2pdIDwtIGFzLm51bWVyaWMocmZfdHJhaW5fcHQkZXZhbHVhdGlvbl9sb2dbLDJdKQogICAgfQogICAgaW5kZXgubWluLnByaWNlIDwtIHdoaWNoLm1pbih0cmFpbl9lcnJvcl9wcmljZSkKICAgIGluZGV4Lm1pbi5wdCA8LSB3aGljaC5taW4odHJhaW5fZXJyb3JfcHQpCiAgICAKICAgIHJmX2NsZWFud2luZV9wcmljZSA8LSB4Z2Jvb3N0KGRhdGEgPSB0cmFpbi5kYXRhLnByaWNlLCBsYWJlbD1jbGVhbndpbmVfdHJhaW4kcHJpY2UsIG1heF9kZXB0aCA9IDAsIHZlcmJvc2UgPSAwLCBudW1fcGFyYWxsZWxfdHJlZSA9IDEwMDAsIHN1YnNhbXBsZSA9IHN1YnNhbXBzW2luZGV4Lm1pbi5wcmljZV0sIG5yb3VuZHMgPSAxLCBjb2xzYW1wbGVfYnlsZXZlbD0wLjYsIG9iamVjdGl2ZSA9ICJyZWc6bGluZWFyIiwgZXZhbF9tZXRyaWMgPSAnbWFlJykKICAgICNwcmludChyZl9jbGVhbndpbmVfcHJpY2UkZmVhdHVyZV9uYW1lcykKICAgIHJmX2NsZWFud2luZV9wdCA8LSB4Z2Jvb3N0KGRhdGEgPSB0cmFpbi5kYXRhLnB0LCBsYWJlbD1jbGVhbndpbmVfdHJhaW4kcG9pbnRzLCBtYXhfZGVwdGggPSAwLCB2ZXJib3NlID0gMCwgbnVtX3BhcmFsbGVsX3RyZWUgPSAxMDAwLCBzdWJzYW1wbGUgPSBzdWJzYW1wc1tpbmRleC5taW4ucHRdLCBucm91bmRzID0gMSwgY29sc2FtcGxlX2J5bGV2ZWw9MC42LCBvYmplY3RpdmUgPSAicmVnOmxpbmVhciIsIGV2YWxfbWV0cmljID0gJ3Jtc2UnKQogICAgcHJpbnQobGVuZ3RoKHJmX2NsZWFud2luZV9wdCRmZWF0dXJlX25hbWVzKSA9PSBsZW5ndGgoY29sbmFtZXModGVzdC5kYXRhLnByaWNlKSkpCgogICAgdGVzdC5wcmVkLnByaWNlIDwtIHByZWRpY3QocmZfY2xlYW53aW5lX3ByaWNlLCB0ZXN0LmRhdGEucHJpY2UpCiAgICBwcmljZV9mb2xkX2Vycm9yW2ksIDFdIDwtIG1lYW4oYWJzKGNsZWFud2luZV90ZXN0JHByaWNlIC0gdGVzdC5wcmVkLnByaWNlKSkKICAgIHRlc3QucHJlZC5wdCA8LSBwcmVkaWN0KHJmX2NsZWFud2luZV9wdCwgdGVzdC5kYXRhLnB0KSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgIHB0X2ZvbGRfZXJyb3JbaSwxXSA8LSBtZWFuKChjbGVhbndpbmVfdGVzdCRwcmljZSAtIHRlc3QucHJlZC5wdCleMikKICAgIAogICAgIyBzdm0KICAgIHN2bV9jbGVhbndpbmVfcHJpY2UgPC0gc3ZtKHByaWNlfi4sIGNsZWFud2luZV90cmFpbikKICAgIHN2bV9jbGVhbndpbmVfcHQgPC0gc3ZtKHBvaW50c34uLCBjbGVhbndpbmVfdHJhaW4pCiAgICByZXN1bHRfcHJpY2UgPC0gdGVzdChzdm1fY2xlYW53aW5lX3ByaWNlLCBjbGVhbndpbmVfdGVzdCkKICAgIHByaWNlX2ZvbGRfZXJyb3JbaSwyXSA8LSBtZWFuKGFicyhjbGVhbndpbmVfdGVzdCRwcmljZSAtIHJlc3VsdF9wcmljZSkpCgogICAgcmVzdWx0X3B0IDwtIHRlc3Qoc3ZtX2NsZWFud2luZV9wdCwgY2xlYW53aW5lX3Rlc3QpCiAgICBwdF9mb2xkX2Vycm9yW2ksIDJdIDwtIG1lYW4oKGNsZWFud2luZV90ZXN0JHBvaW50cyAtIHJlc3VsdF9wdCleMikKICAgIAogICAgIyBnYm0KICAgIGdibUZpdF9wcmljZSA8LSBnYm0oZm9ybXVsYSA9IHByaWNlIH4gLiwgZGF0YSA9IGNsZWFud2luZV90cmFpbiwgCiAgICAgICAgICAgICAgICAgIG4udHJlZXMgPSAxMDAwLCBzaHJpbmthZ2UgPSAwLjA1LCBpbnRlcmFjdGlvbi5kZXB0aCA9IDIsIGN2LmZvbGRzID0gMTAsIAogICAgICAgICAgICAgICAgICBkaXN0cmlidXRpb24gPSAibGFwbGFjZSIsIHZlcmJvc2UgPSBGQUxTRSkKCiAgICBiZXN0X2l0ZXJfcHJpY2UgPC0gZ2JtLnBlcmYoZ2JtRml0X3ByaWNlLCBtZXRob2QgPSAiY3YiLCBwbG90Lml0ID0gRikKICAgIGdibV9wcmljZV9wcmVkIDwtIHByZWRpY3QoZ2JtRml0X3ByaWNlLCBuZXdkYXRhID0gY2xlYW53aW5lX3Rlc3QsIG4udHJlZXMgPSBiZXN0X2l0ZXJfcHJpY2UpCiAgICBwcmljZV9mb2xkX2Vycm9yW2ksM10gPC0gbWVhbihhYnMoY2xlYW53aW5lX3Rlc3QkcHJpY2UgLSBnYm1fcHJpY2VfcHJlZCkpCiAgICAKICAgIGdibUZpdF9wdCA8LSBnYm0oZm9ybXVsYSA9IHBvaW50cyB+IC4sIGRhdGEgPSBjbGVhbndpbmVfdHJhaW4sIAogICAgICAgICAgICAgICAgICBuLnRyZWVzID0gMTAwMCwgc2hyaW5rYWdlID0gMC4wNSwgaW50ZXJhY3Rpb24uZGVwdGggPSAyLCBjdi5mb2xkcyA9IDEwLCAKICAgICAgICAgICAgICAgICAgZGlzdHJpYnV0aW9uID0gImxhcGxhY2UiLCB2ZXJib3NlID0gRkFMU0UpIAoKICAgIGJlc3RfaXRlcl9wdCA8LSBnYm0ucGVyZihnYm1GaXRfcHQsIG1ldGhvZCA9ICJjdiIsIHBsb3QuaXQgPSBGKQogICAgZ2JtX3B0X3ByZWQgPC0gcHJlZGljdChnYm1GaXRfcHQsIG5ld2RhdGEgPSBjbGVhbndpbmVfdGVzdCwgbi50cmVlcyA9IGJlc3RfaXRlcl9wdCkKICAgIHB0X2ZvbGRfZXJyb3JbaSwzXSA8LSBtZWFuKChjbGVhbndpbmVfdGVzdCRwb2ludHMgLSBnYm1fcHRfcHJlZCleMikKICAgIAogICAgIyBnbG1uZXQKICAgICMgdHJuLm10eCA8LSBtb2RlbC5tYXRyaXgofi4sY2xlYW53aW5lX3RyYWluKQogICAgIyB0cm4uc210eCA8LSBNYXRyaXgodHJuLm10eCxzcGFyc2U9VClbLC0xXQogICAgIyAKICAgICMgdHN0Lm10eCA8LSBtb2RlbC5tYXRyaXgofi4sY2xlYW53aW5lX3Rlc3QpCiAgICAjIHRzdC5zbXR4IDwtIE1hdHJpeCh0c3QubXR4LHNwYXJzZT1UKVssLTFdCiAgICAKICAgIGZpdC5sYXNzby5wcmljZSA8LSBjdi5nbG1uZXQoeD10cmFpbi5kYXRhLnByaWNlLGNsZWFud2luZV90cmFpbiRwcmljZSxhbHBoYSA9IDEsdHlwZS5tZWFzdXJlID0gIm1hZSIpIAogICAgbC5lcnIucHJpY2UgPC0gcHJlZGljdChmaXQubGFzc28ucHJpY2UsbmV3eCA9IHRlc3QuZGF0YS5wcmljZSx0eXBlID0gJ3Jlc3BvbnNlJykKICAgIHByaWNlX2ZvbGRfZXJyb3JbaSw0XSA8LSBtZWFuKGFicyhjbGVhbndpbmVfdGVzdCRwcmljZSAtIGwuZXJyLnByaWNlKSkKIAogICAgZml0LnJpZGdlLnByaWNlIDwtIGN2LmdsbW5ldCh4PXRyYWluLmRhdGEucHJpY2UsY2xlYW53aW5lX3RyYWluJHByaWNlLGFscGhhPTAsdHlwZS5tZWFzdXJlID0gIm1hZSIpCiAgICByLmVyci5wcmljZSA8LSBwcmVkaWN0KGZpdC5yaWRnZS5wcmljZSxuZXd4ID0gdGVzdC5kYXRhLnByaWNlLCB0eXBlID0gJ3Jlc3BvbnNlJykKICAgIHByaWNlX2ZvbGRfZXJyb3JbaSw1XSA8LSBtZWFuKGFicyhjbGVhbndpbmVfdGVzdCRwcmljZSAtIHIuZXJyLnByaWNlKSkKICAgIAogICAgZml0Lmxhc3NvLnB0IDwtIGN2LmdsbW5ldCh4PXRyYWluLmRhdGEucHQsY2xlYW53aW5lX3RyYWluJHBvaW50cyxhbHBoYSA9IDEsdHlwZS5tZWFzdXJlID0gIm1zZSIpIAogICAgbC5lcnIucHQgPC0gcHJlZGljdChmaXQubGFzc28ucHQsbmV3eCA9IHRlc3QuZGF0YS5wdCx0eXBlID0gJ3Jlc3BvbnNlJykKICAgIHB0X2ZvbGRfZXJyb3JbaSw0XSA8LSBtZWFuKChjbGVhbndpbmVfdGVzdCRwb2ludHMgLSBsLmVyci5wdCleMikKIAogICAgZml0LnJpZGdlLnB0IDwtIGN2LmdsbW5ldCh4PXRyYWluLmRhdGEucHQsY2xlYW53aW5lX3RyYWluJHBvaW50cyxhbHBoYT0wLHR5cGUubWVhc3VyZSA9ICJtc2UiKQogICAgci5lcnIucHQgPC0gcHJlZGljdChmaXQucmlkZ2UucHQsbmV3eCA9IHRlc3QuZGF0YS5wdCwgdHlwZSA9ICdyZXNwb25zZScpCiAgICBwdF9mb2xkX2Vycm9yW2ksNV0gPC0gbWVhbigoY2xlYW53aW5lX3Rlc3QkcG9pbnRzIC0gci5lcnIucHQpXjIpCn0KYGBgCgpgYGB7cn0KY29sbmFtZXMocHJpY2VfZm9sZF9lcnJvcikgPC0gYygicmFuZG9tX2ZvcmVzdCIsICJzdm0iLCAiZ2JtIiwgImxhc3NvIiwgInJpZGdlIikKY29sbmFtZXMocHRfZm9sZF9lcnJvcikgPC0gYygicmFuZG9tX2ZvcmVzdCIsICJzdm0iLCAiZ2JtIiwgImxhc3NvIiwgInJpZGdlIikKcHJpY2VfZm9sZF9lcnJvcgpwdF9mb2xkX2Vycm9yCmBgYAoKYGBge3J9CiMgY2xlYW5fdHJhaW4KY2xlYW5fdHJhaW4gPC0gdHJhaW5bbGFwcGx5KHRyYWluLCBmdW5jdGlvbih4KSBzdW0oaXMubmEoeCkpIC8gbGVuZ3RoKHgpKSAgPCAwLjFdCmNsZWFuX3RyYWluIDwtIGNsZWFuKGNsZWFuX3RyYWluKQpjbGVhbl90cmFpbiA8LSBjbGVhbl90cmFpbiAlPiUgc2VsZWN0KC10aXRsZSwgLWNvdW50cnksIC1wcm92aW5jZSkKY2xlYW5fdHJhaW4gPC0gY2xlYW5fdHJhaW4gJT4lIHNlbGVjdCgtU2NhbGVkX1BvaW50cywgLVBlcmNlbnRpbGVfUG9pbnRzKQpkaW0oY2xlYW5fdHJhaW4pCgojIHBpY2sgYmVzdCBtb2RlbCBhbmQgcHJlZGljdCBvbiB0ZXN0CmNsZWFuX3Rlc3QgPC0gdGVzdFtsYXBwbHkodGVzdCwgZnVuY3Rpb24oeCkgc3VtKGlzLm5hKHgpKSAvIGxlbmd0aCh4KSApIDwgMC4xXQpjbGVhbl90ZXN0IDwtIGNsZWFuKGNsZWFuX3Rlc3QpCmNsZWFuX3Rlc3RfY291bnRyeSA8LSBjbGVhbl90ZXN0JGNvdW50cnkKY2xlYW5fdGVzdF9wcm92aW5jZSA8LSBjbGVhbl90ZXN0JHByb3ZpbmNlCmNsZWFuX3Rlc3QgPC0gY2xlYW5fdGVzdCAlPiUgc2VsZWN0KC10aXRsZSwgLWNvdW50cnksIC1wcm92aW5jZSkKY2xlYW5fdGVzdCA8LSBjbGVhbl90ZXN0ICU+JSBzZWxlY3QoLVNjYWxlZF9Qb2ludHMsIC1QZXJjZW50aWxlX1BvaW50cykKZGltKGNsZWFuX3Rlc3QpCiMgdGVzdC5kYXRhLnByaWNlIDwtIHRlc3QgJT4lIHNlbGVjdCgtcHJpY2UpCiMgdGVzdC5kYXRhLnByaWNlIDwtIHNwYXJzZS5tb2RlbC5tYXRyaXgofi4sIHRlc3QuZGF0YS5wcmljZSlbLC0xXQojIHRlc3QuZGF0YS5wdCA8LSB0ZXN0ICU+JSBzZWxlY3QoLXBvaW50cykKIyB0ZXN0LmRhdGEucHQgPC0gc3BhcnNlLm1vZGVsLm1hdHJpeCh+LiwgdGVzdC5kYXRhLnB0KVssLTFdCiAgICAKc3ZtX3Rlc3RfcHJpY2UgPC0gc3ZtKHByaWNlfi4sIGNsZWFuX3RyYWluKQpzdm1fdGVzdF9wdCA8LSBzdm0ocG9pbnRzfi4sIGNsZWFuX3RyYWluKQpyZXN1bHRfdGVzdF9wcmljZSA8LSBwcmVkaWN0KHN2bV90ZXN0X3ByaWNlLCBjbGVhbl90ZXN0KQptZWFuKGFicyhjbGVhbl90ZXN0JHByaWNlIC0gcmVzdWx0X3Rlc3RfcHJpY2UpKQoKcmVzdWx0X3Rlc3RfcHQgPC0gcHJlZGljdChzdm1fdGVzdF9wdCwgY2xlYW5fdGVzdCkKbWVhbigoY2xlYW5fdGVzdCRwb2ludHMgLSByZXN1bHRfdGVzdF9wdCleMikKCmNsZWFuX3Rlc3QkY291bnRyeSA8LSBjbGVhbl90ZXN0X2NvdW50cnkKY2xlYW5fdGVzdCRwcm92aW5jZSA8LSBjbGVhbl90ZXN0X3Byb3ZpbmNlCmNsZWFuX3Rlc3QkcHJpY2VfcHJlZCA8LSByZXN1bHRfdGVzdF9wcmljZQpjbGVhbl90ZXN0JHBvaW50X3ByZWQgPC0gcmVzdWx0X3Rlc3RfcHQKCmBgYAoKYGBge3J9CndyaXRlX2NzdihjbGVhbl90ZXN0LCAiY2xlYW5fdGVzdC5jc3YiKQpgYGAKCmBgYHtyfQpwcmljZV9wdF9kZiA8LSBkYXRhLmZyYW1lKGNiaW5kKGNsZWFuX3Rlc3QkcG9pbnRzLCBjbGVhbl90ZXN0JHByaWNlLCByZXN1bHRfdGVzdF9wcmljZSwgcmVzdWx0X3Rlc3RfcHQpKQpjb2xuYW1lcyhwcmljZV9wdF9kZikgPC0gYygicG9pbnRzIiwgInByaWNlIiwgInByZWRfcHJpY2UiLCAicHJlZF9wb2ludCIpCmdncGxvdChhZXMoeD1wb2ludHMsIHk9cHJpY2UpLCBkYXRhID0gcHJpY2VfcHRfZGYpICsgZ2VvbV9qaXR0ZXIoKSArIGdlb21fcG9pbnQoYWVzKHg9cG9pbnRzLCB5PXByZWRfcHJpY2UpLCBjb2w9InJlZCIpCmdncGxvdChhZXMoeD1wcmljZSwgeT1wb2ludHMpLCBkYXRhID0gcHJpY2VfcHRfZGYpICsgZ2VvbV9qaXR0ZXIoKSArIGdlb21fcG9pbnQoYWVzKHg9cHJpY2UsIHk9cHJlZF9wb2ludCksIGNvbD0icmVkIikKYGBgCgpgYGB7cn0Kc2F2ZShjbGVhbl90cmFpbiwgY2xlYW5fdGVzdCwgcHJpY2VfZm9sZF9lcnJvciwgcHRfZm9sZF9lcnJvciwgZmlsZSA9ICJ+L0Ryb3Bib3gvRHVrZS83MDcvUHJvamVjdC83MDdXaW5lL3dpbmVfbW9kZWxzLlJEYXRhIikKYGBgCgoKYGBge3J9CiMgdGVzdApjbGVhbndpbmUgPC0gZGF0YQpjbGVhbndpbmUkbG9jYXRpb24gPC0gZmFjdG9yKGNsZWFud2luZSRsb2NhdGlvbikKY2xlYW53aW5lJHRhc3Rlcl9uYW1lIDwtIGZhY3RvcihjbGVhbndpbmUkdGFzdGVyX25hbWUpCmNsZWFud2luZSR0aXRsZSA8LSBmYWN0b3IoY2xlYW53aW5lJHRpdGxlKQpjbGVhbndpbmUkdmFyaWV0eSA8LSBmYWN0b3IoY2xlYW53aW5lJHZhcmlldHkpCmNsZWFud2luZSR0YXN0ZXJfbmFtZSA8LSBhZGROQShjbGVhbndpbmUkdGFzdGVyX25hbWUpCmNsZWFud2luZSRyZWdpb25fMSA8LSBmYWN0b3IoY2xlYW53aW5lJHJlZ2lvbl8xKQpjbGVhbndpbmUkcmVnaW9uXzIgPC0gZmFjdG9yKGNsZWFud2luZSRyZWdpb25fMikKCmNsZWFud2luZSA8LSBjbGVhbndpbmVbbGFwcGx5KGNsZWFud2luZSwgZnVuY3Rpb24oeCkgc3VtKGlzLm5hKHgpKSAvIGxlbmd0aCh4KSApIDwgMC4xXQpjbGVhbndpbmUgPC0gY2xlYW4oY2xlYW53aW5lKQpjbGVhbndpbmUgPC0gY2xlYW53aW5lICU+JSBzZWxlY3QoLXRpdGxlLCAtY291bnRyeSkKc3VtbWFyeShjbGVhbndpbmUpCgpgYGAKYGBge3J9Cgp0bXAgPSBkYXRhICMlPiUgc2VsZWN0KC1TY2FsZWRfUG9pbnRzLCAtUGVyY2VudGlsZV9Qb2ludHMpCgp0cmFpbi5pZHggPC0gc2FtcGxlKG5yb3codG1wKSwgNC81ICogbnJvdyh0bXApKQpjbGVhbndpbmVfdHJhaW4gPC0gdG1wW3RyYWluLmlkeCwgXQpjbGVhbndpbmVfdGVzdCA8LSB0bXBbLXRyYWluLmlkeCwgXQoKY2xlYW53aW5lX3RyYWluJGxvY2F0aW9uIDwtIGZhY3RvcihjbGVhbndpbmVfdHJhaW4kbG9jYXRpb24pCiAgICBjbGVhbndpbmVfdHJhaW4kdGFzdGVyX25hbWUgPC0gZmFjdG9yKGNsZWFud2luZV90cmFpbiR0YXN0ZXJfbmFtZSkKICAgIGNsZWFud2luZV90cmFpbiR0YXN0ZXJfbmFtZSA8LSBhZGROQShjbGVhbndpbmVfdHJhaW4kdGFzdGVyX25hbWUpCiAgICBjbGVhbndpbmVfdHJhaW4kdGl0bGUgPC0gZmFjdG9yKGNsZWFud2luZV90cmFpbiR0aXRsZSkKICAgIGNsZWFud2luZV90cmFpbiR2YXJpZXR5IDwtIGZhY3RvcihjbGVhbndpbmVfdHJhaW4kdmFyaWV0eSkKICAgIGNsZWFud2luZV90cmFpbiRyZWdpb25fMSA8LSBmYWN0b3IoY2xlYW53aW5lX3RyYWluJHJlZ2lvbl8xKQogICAgY2xlYW53aW5lX3RyYWluJHJlZ2lvbl8yIDwtIGZhY3RvcihjbGVhbndpbmVfdHJhaW4kcmVnaW9uXzIpCiAgICAKICAgIGNsZWFud2luZV90ZXN0JGxvY2F0aW9uIDwtIGZhY3RvcihjbGVhbndpbmVfdGVzdCRsb2NhdGlvbikKICAgIGNsZWFud2luZV90ZXN0JHRhc3Rlcl9uYW1lIDwtIGZhY3RvcihjbGVhbndpbmVfdGVzdCR0YXN0ZXJfbmFtZSkKICAgIGNsZWFud2luZV90ZXN0JHRhc3Rlcl9uYW1lIDwtIGFkZE5BKGNsZWFud2luZV90ZXN0JHRhc3Rlcl9uYW1lKQogICAgY2xlYW53aW5lX3Rlc3QkdGl0bGUgPC0gZmFjdG9yKGNsZWFud2luZV90ZXN0JHRpdGxlKQogICAgY2xlYW53aW5lX3Rlc3QkdmFyaWV0eSA8LSBmYWN0b3IoY2xlYW53aW5lX3Rlc3QkdmFyaWV0eSkKICAgIGNsZWFud2luZV90ZXN0JHJlZ2lvbl8xIDwtIGZhY3RvcihjbGVhbndpbmVfdGVzdCRyZWdpb25fMSkKICAgIGNsZWFud2luZV90ZXN0JHJlZ2lvbl8yIDwtIGZhY3RvcihjbGVhbndpbmVfdGVzdCRyZWdpb25fMikKCiAgICBjbGVhbndpbmVfdHJhaW4gPC0gY2xlYW53aW5lX3RyYWluW2xhcHBseShjbGVhbndpbmVfdHJhaW4sIGZ1bmN0aW9uKHgpIHN1bShpcy5uYSh4KSkgLyBsZW5ndGgoeCkgKSA8IDAuMV0KICAgIGNsZWFud2luZV90cmFpbiA8LSBjbGVhbihjbGVhbndpbmVfdHJhaW4pCiAgICBjbGVhbndpbmVfdHJhaW4gPC0gY2xlYW53aW5lX3RyYWluICU+JSBzZWxlY3QoLXRpdGxlLCAtY291bnRyeSkKICAgIHByaW50KGNvbG5hbWVzKGNsZWFud2luZV90cmFpbikpCiAgICAKICAgIGNsZWFud2luZV90ZXN0IDwtIGNsZWFud2luZV90ZXN0W2xhcHBseShjbGVhbndpbmVfdGVzdCwgZnVuY3Rpb24oeCkgc3VtKGlzLm5hKHgpKSAvIGxlbmd0aCh4KSApIDwgMC4xXQogICAgY2xlYW53aW5lX3Rlc3QgPC0gY2xlYW4oY2xlYW53aW5lX3Rlc3QpCiAgICBjbGVhbndpbmVfdGVzdCA8LSBjbGVhbndpbmVfdGVzdCAlPiUgc2VsZWN0KC10aXRsZSwgLWNvdW50cnkpCiAgICBwcmludChjb2xuYW1lcyhjbGVhbndpbmVfdGVzdCkpCiAgICAKICAgIGNsZWFud2luZV90cmFpbiA8LSBjbGVhbndpbmVfdHJhaW4gJT4lIHNlbGVjdCgtU2NhbGVkX1BvaW50cywgLVBlcmNlbnRpbGVfUG9pbnRzKQogICAgdHJhaW4uZGF0YS5wcmljZSA8LSBjbGVhbndpbmVfdHJhaW4gJT4lIHNlbGVjdCgtcHJpY2UpCiAgICBwcmludChjb2xuYW1lcyh0cmFpbi5kYXRhLnByaWNlKSkKICAgIHRyYWluLmRhdGEucHJpY2UgPC0gc3BhcnNlLm1vZGVsLm1hdHJpeCh+LiwgdHJhaW4uZGF0YS5wcmljZSlbLC0xXQogICAgdHJhaW4uZGF0YS5wdCA8LSBjbGVhbndpbmVfdHJhaW4gJT4lIHNlbGVjdCgtcG9pbnRzKQogICAgdHJhaW4uZGF0YS5wdCA8LSBzcGFyc2UubW9kZWwubWF0cml4KH4uLCB0cmFpbi5kYXRhLnB0KVssLTFdCiAgICAKICAgIGNsZWFud2luZV90ZXN0IDwtIGNsZWFud2luZV90ZXN0ICU+JSBzZWxlY3QoLVNjYWxlZF9Qb2ludHMsIC1QZXJjZW50aWxlX1BvaW50cykKICAgIHRlc3QuZGF0YS5wcmljZSA8LSBjbGVhbndpbmVfdGVzdCAlPiUgc2VsZWN0KC1wcmljZSkKICAgIHByaW50KGNvbG5hbWVzKHRlc3QuZGF0YS5wcmljZSkpCiAgICB0ZXN0LmRhdGEucHJpY2UgPC0gc3BhcnNlLm1vZGVsLm1hdHJpeCh+LiwgdGVzdC5kYXRhLnByaWNlKVssLTFdCiAgICBwcmludChjb2xuYW1lcyh0ZXN0LmRhdGEucHJpY2UpKQogICAgdGVzdC5kYXRhLnB0IDwtIGNsZWFud2luZV90ZXN0ICU+JSBzZWxlY3QoLXBvaW50cykKICAgIHRlc3QuZGF0YS5wdCA8LSBzcGFyc2UubW9kZWwubWF0cml4KH4uLCB0ZXN0LmRhdGEucHQpWywtMV0KYGBgCgpgYGB7cn0KIyB0bXAgPSBjbGVhbndpbmUgJT4lIHNlbGVjdCgtU2NhbGVkX1BvaW50cywgLVBlcmNlbnRpbGVfUG9pbnRzKQojIAojIHRyYWluLmlkeCA8LSBzYW1wbGUobnJvdyh0bXApLCA0LzUgKiBucm93KHRtcCkpCiMgdG1wLnRyYWluIDwtIHRtcFt0cmFpbi5pZHgsIF0KIyB0bXAudGVzdCA8LSB0bXBbLXRyYWluLmlkeCwgXQojIHRyYWluLmRhdGEucHJpY2UgPC0gdG1wLnRyYWluICU+JSBzZWxlY3QoLXByaWNlKQojIHRyYWluLmRhdGEucHJpY2UgPC0gc3BhcnNlLm1vZGVsLm1hdHJpeCh+LiwgdHJhaW4uZGF0YS5wcmljZSlbLC0xXQojIHByaW50KGNvbG5hbWVzKHRyYWluLmRhdGEucHJpY2UpKQojIHRyYWluLmRhdGEucHQgPC0gdG1wLnRyYWluICU+JSBzZWxlY3QoLXBvaW50cykKIyB0cmFpbi5kYXRhLnB0IDwtIHNwYXJzZS5tb2RlbC5tYXRyaXgofi4sIHRyYWluLmRhdGEucHQpWywtMV0KIyB0ZXN0LmRhdGEucHJpY2UgPC0gdG1wLnRlc3QgJT4lIHNlbGVjdCgtcHJpY2UpCiMgdGVzdC5kYXRhLnByaWNlIDwtIHNwYXJzZS5tb2RlbC5tYXRyaXgofi4sIHRlc3QuZGF0YS5wcmljZSlbLC0xXQojIHByaW50KGNvbG5hbWVzKHRlc3QuZGF0YS5wcmljZSkpCiMgdGVzdC5kYXRhLnB0IDwtIHRtcC50ZXN0ICU+JSBzZWxlY3QoLXBvaW50cykKIyB0ZXN0LmRhdGEucHQgPC0gc3BhcnNlLm1vZGVsLm1hdHJpeCh+LiwgdGVzdC5kYXRhLnB0KVssLTFdCgpzdWJzYW1wcyA8LSBzZXEoMC4xLCAxLCAwLjEpCmNvbHNhbXBzIDwtIHNlcSgwLjEsIDEsIDAuMSkKI3RyYWluX2Vycm9yIDwtIG1hdHJpeChOQSwgbnJvdyA9IGxlbmd0aChzZXEoMC4xLCAxLCAwLjEpKSwgbmNvbCA9IGxlbmd0aChzZXEoMC4xLCAxLCAwLjEpKSkKdHJhaW5fZXJyb3JfcHJpY2UgPC0gdmVjdG9yKCJudW1lcmljIiwgbGVuZ3RoKHNlcSgwLjEsIDEsIDAuMSkpKQp0cmFpbl9lcnJvcl9wdCA8LSB2ZWN0b3IoIm51bWVyaWMiLCBsZW5ndGgoc2VxKDAuMSwgMSwgMC4xKSkpCmZvciAoaSBpbiAxOmxlbmd0aChzZXEoMC4xLCAxLCAwLjEpKSkgewogICNmb3IgKGogaW4gMTpsZW5ndGgoc2VxKDAuMSwgMSwgMC4xKSkpIHsKICAgIHJmX2NsZWFud2luZV9wcmljZSA8LSB4Z2Jvb3N0KGRhdGEgPSB0cmFpbi5kYXRhLnByaWNlLCBsYWJlbD1jbGVhbndpbmVfdHJhaW4kcHJpY2UsIHZlcmJvc2UgPSAwLCBtYXhfZGVwdGggPSA1LCBudW1fcGFyYWxsZWxfdHJlZSA9IDEwMDAsIHN1YnNhbXBsZSA9IHN1YnNhbXBzW2ldLCBucm91bmRzID0gMSwgY29sc2FtcGxlX2J5bGV2ZWw9MC42LCBvYmplY3RpdmUgPSAicmVnOmxpbmVhciIsIGV2YWxfbWV0cmljID0gJ21hZScpCiAgICByZl9jbGVhbndpbmVfcHQgPC0geGdib29zdChkYXRhID0gdHJhaW4uZGF0YS5wdCwgbGFiZWw9Y2xlYW53aW5lX3RyYWluJHBvaW50cywgdmVyYm9zZSA9IDAsIG1heF9kZXB0aCA9IDUsIG51bV9wYXJhbGxlbF90cmVlID0gMTAwMCwgc3Vic2FtcGxlID0gc3Vic2FtcHNbaV0sIG5yb3VuZHMgPSAxLCBjb2xzYW1wbGVfYnlsZXZlbD0wLjYsIG9iamVjdGl2ZSA9ICJyZWc6bGluZWFyIiwgZXZhbF9tZXRyaWMgPSAncm1zZScpCiAgICAjdHJhaW5fZXJyb3JbaSxqXSA8LSBhcy5udW1lcmljKHJmX2NsZWFud2luZSRldmFsdWF0aW9uX2xvZ1ssMl0pCiAgICB0cmFpbl9lcnJvcl9wcmljZVtpXSA8LSBhcy5udW1lcmljKHJmX2NsZWFud2luZV9wcmljZSRldmFsdWF0aW9uX2xvZ1ssMl0pCiAgICB0cmFpbl9lcnJvcl9wdFtpXSA8LSBhcy5udW1lcmljKHJmX2NsZWFud2luZV9wdCRldmFsdWF0aW9uX2xvZ1ssMl0pCiAgI30KfQptaW4uaW5kZXgucHJpY2UgPC0gd2hpY2godHJhaW5fZXJyb3JfcHJpY2UgPT0gbWluKHRyYWluX2Vycm9yX3ByaWNlKSwgYXJyLmluZCA9IFRSVUUpCm1pbi5pbmRleC5wdCA8LSB3aGljaCh0cmFpbl9lcnJvcl9wdCA9PSBtaW4odHJhaW5fZXJyb3JfcHQpLCBhcnIuaW5kID0gVFJVRSkKcmZfY2xlYW53aW5lX3ByaWNlIDwtIHhnYm9vc3QoZGF0YSA9IHRyYWluLmRhdGEucHJpY2UsIGxhYmVsPWNsZWFud2luZV90cmFpbiRwcmljZSwgbWF4X2RlcHRoID0gMCwgdmVyYm9zZSA9IDAsIG51bV9wYXJhbGxlbF90cmVlID0gMTAwMCwgc3Vic2FtcGxlID0gc3Vic2FtcHNbbWluLmluZGV4LnByaWNlXSwgbnJvdW5kcyA9IDEsIGNvbHNhbXBsZV9ieWxldmVsPTAuNiwgb2JqZWN0aXZlID0gInJlZzpsaW5lYXIiLCBldmFsX21ldHJpYyA9ICdtYWUnKQpwcmludChyZl9jbGVhbndpbmVfcHJpY2UkZmVhdHVyZV9uYW1lcykKcmZfY2xlYW53aW5lX3B0IDwtIHhnYm9vc3QoZGF0YSA9IHRyYWluLmRhdGEucHQsIGxhYmVsPWNsZWFud2luZV90cmFpbiRwb2ludHMsIG1heF9kZXB0aCA9IDAsIHZlcmJvc2UgPSAwLCBudW1fcGFyYWxsZWxfdHJlZSA9IDEwMDAsIHN1YnNhbXBsZSA9IHN1YnNhbXBzW21pbi5pbmRleC5wdF0sIG5yb3VuZHMgPSAxLCBjb2xzYW1wbGVfYnlsZXZlbD0wLjYsIG9iamVjdGl2ZSA9ICJyZWc6bGluZWFyIiwgZXZhbF9tZXRyaWMgPSAncm1zZScpCgp0ZXN0LnByZWQucHJpY2UgPC0gcHJlZGljdChyZl9jbGVhbndpbmVfcHJpY2UsIHRlc3QuZGF0YS5wcmljZSkKbWVhbihhYnModGVzdC5wcmVkLnByaWNlIC0gdG1wLnRlc3QkcHJpY2UpKQoKdGVzdC5wcmVkLnB0IDwtIHByZWRpY3QocmZfY2xlYW53aW5lX3B0LCB0ZXN0LmRhdGEucHQpCm1lYW4oKHRlc3QucHJlZC5wdCAtIHRtcC50ZXN0JHBvaW50cyleMikKYGBgCgpgYGB7cn0Kd2hpY2godHJhaW5fZXJyb3IgPT0gbWluKHRyYWluX2Vycm9yKSwgYXJyLmluZCA9IFRSVUUpCmBgYApgYGB7cn0KI2dhbW1hbGlzdCA8LSBjKDAuMDA1LDAuMDEsMC4wMTUsMC4wMiwwLjAyNSwwLjAzLDAuMDM1LDAuMDQsMC4wNDUsMC4wNSkKI3R1bmUub3V0IDwtIHR1bmUuc3ZtKHRtcC5kYXRhLCB0bXAkcHJpY2UsIAogICAgICAgICAgICAgICAgICNrZXJuZWw9J2xpbmVhcicsIGNvc3Q9Ml4oLTE6NSksIGdhbW1hID0gZ2FtbWFsaXN0KQojc3VtbWFyeSh0dW5lLm91dCkKCnN2bV9jbGVhbndpbmVfcHJpY2UgPC0gc3ZtKHByaWNlfi4sIHRtcC50cmFpbikKc3ZtX2NsZWFud2luZV9wcmljZQoKc3ZtX2NsZWFud2luZV9wdCA8LSBzdm0ocG9pbnRzfi4sIHRtcC50cmFpbikKc3ZtX2NsZWFud2luZV9wdApgYGAKYGBge3J9CnJlc3VsdF9wcmljZSA8LSB0ZXN0KHN2bV9jbGVhbndpbmVfcHJpY2UsIHRtcC50ZXN0KQptZWFuKGFicyh0bXAkcHJpY2UgLSByZXN1bHRfcHJpY2UpKQoKcmVzdWx0X3B0IDwtIHRlc3Qoc3ZtX2NsZWFud2luZV9wdCwgdG1wLnRlc3QpCm1lYW4oKHRtcCRwb2ludHMgLSByZXN1bHRfcHQpXjIpCmBgYAoKCgpgYGB7cn0KIyMgU3BsaXQgaW4gdHJhaW5pbmcgYW5kIHRlc3QgZGF0YQp0cmFpbi5pZHggPC0gc2FtcGxlKG5yb3coaXJpcyksIDIvMyAqIG5yb3coaXJpcykpCmlyaXMudHJhaW4gPC0gaXJpc1t0cmFpbi5pZHgsIF0KaXJpcy50ZXN0IDwtIGlyaXNbLXRyYWluLmlkeCwgXQoKIyMgUnVuIGNhc2Utc3BlY2lmaWMgUkYKY3NyZihTcGVjaWVzIH4gLiwgdHJhaW5pbmdfZGF0YSA9IGlyaXMudHJhaW4sIHRlc3RfZGF0YSA9IGlyaXMudGVzdCwgCiAgICAgcGFyYW1zMSA9IGxpc3QobnVtLnRyZWVzID0gNTAsIG10cnkgPSA0KSwgCiAgICAgcGFyYW1zMiA9IGxpc3QobnVtLnRyZWVzID0gNSkpCmBgYAoKCmBgYHtyfQojaWR4ID0gc2FtcGxlKDE6OTAwKQp0bXBfdHJhaW4gPSB0bXBbMTo5MDAsIF0KdG1wX3Rlc3QgID0gdG1wWzkwMToxMDAwLCBdCgpyZl9jbGVhbndpbmUgPC0gY3NyZihzcGFyc2UubW9kZWwubWF0cml4KHRtcF90cmFpbiAlPiUgc2VsZWN0KC1wcmljZSkpLCB0bXBfdHJhaW4kcHJpY2UsIAogICAgICAgICAgICAgICAgICAgICBwYXJhbXMxID0gbGlzdChudW0udHJlZXMgPSA1MDAsIG10cnkgPSA0KSwgCiAgICAgICAgICAgICAgICAgICAgIHBhcmFtczIgPSBsaXN0KG51bS50cmVlcyA9IDUwLCBtdHJ5ID0gNCkpCmBgYAoKCmBgYHtyfQpyZl9jbGVhbndpbmUgPC0gY3NyZihwcmljZSB+IC4sIGNsZWFud2luZSAlPiUgc2VsZWN0KC1TY2FsZWRfUG9pbnRzKSwgcGFyYW1zMSA9IGxpc3QobnVtLnRyZWVzID0gNTAsIG10cnkgPSA0KSkKYGBgCgoKCmBgYHtyfQojIGdibQpnYm1GaXQgPC0gZ2JtKGZvcm11bGEgPSBwcmljZSB+IC4sIGRhdGEgPSBjbGVhbndpbmVfcHJpY2VfMTBwZXJjICU+JSBzZWxlY3QoLXRpdGxlLCAtd2luZXJ5KSwgCiAgICAgICAgICAgICAgbi50cmVlcyA9IDEwMDAsIHNocmlua2FnZSA9IDAuMDEsIGludGVyYWN0aW9uLmRlcHRoID0gMiwgY3YuZm9sZHMgPSAxMCwgCiAgICAgICAgICAgICAgZGlzdHJpYnV0aW9uID0gImdhdXNzaWFuIikKCmJlc3RfaXRlciA8LSBnYm0ucGVyZihnYm1GaXQsIG1ldGhvZCA9ICJjdiIpCgojIFBlcmZvcm1hbmNlIG9uIHdob2xlIGRhdGFzZXQKCmRpZmZfc3F1YXJlZCA8LSAoY2xlYW53aW5lX3ByaWNlXzEwcGVyYyRwcmljZSAtIAogICAgcHJlZGljdChnYm1GaXQsIG5ld2RhdGEgPSBjbGVhbndpbmVfcHJpY2VfMTBwZXJjLCBuLnRyZWVzID0gYmVzdF9pdGVyKSleMgoKbWVhbihkaWZmX3NxdWFyZWQsIG5hLnJtID0gVFJVRSkKCmBgYApgYGB7cn0KIyByYW5nZXIKY3NyZl93aW5lIDwtIGNzcmYoZm9ybXVsYSA9IHByaWNlIH4gLiwgdHJhaW5pbmdfZGF0YSA9IGNsZWFud2luZV9wcmljZV90cmFpbiwgdGVzdF9kYXRhID0gY2xlYW53aW5lX3ByaWNlX3Rlc3QsIHBhcmFtczEgPSBsaXN0KG51bS50cmVlcyA9IDUwLCBtdHJ5ID0gNCksIHBhcmFtczIgPSBsaXN0KG51bS50cmVlcyA9IDUpKQpjc3JmX3dpbmUKYGBgCgoKYGBge3J9CiMgcmFuZG9tIGZvcmVzdApkYXQuY2xlYW53aW5lIDwtIHNwYXJzZS5tb2RlbC5tYXRyaXgofi4sIGRhdGEgPSBjbGVhbndpbmVfcHJpY2VfMTBwZXJjICU+JSBzZWxlY3QoLXRpdGxlLCAtd2luZXJ5KSlbLC0xXQpwcmljZSA8LSBjbGVhbndpbmVfcHJpY2VfMTBwZXJjJHByaWNlCgpyZl93aW5lIDwtIHJhbmRvbUZvcmVzdChhcy5tYXRyaXgoZGF0LmNsZWFud2luZSksIHByaWNlLCBkby50cmFjZSA9IEYsIAogICAgICAgICAgICAgICAgICAgICAgICAgIGltcG9ydGFuY2UgPSBUKSAKcmZfd2luZQojb29iX3dpbmUgPC0gbWVhbihwcmljZSAtIHJmX3dpbmUkdm90ZXNbLDJdKV4yKQpgYGAKCg==